123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834683568366837683868396840684168426843684468456846684768486849685068516852685368546855685668576858685968606861686268636864686568666867686868696870687168726873687468756876687768786879688068816882688368846885688668876888688968906891689268936894689568966897689868996900690169026903690469056906690769086909691069116912691369146915691669176918691969206921692269236924692569266927692869296930693169326933693469356936693769386939694069416942694369446945694669476948694969506951695269536954695569566957695869596960696169626963696469656966696769686969697069716972697369746975697669776978697969806981698269836984698569866987698869896990699169926993699469956996699769986999700070017002700370047005700670077008700970107011701270137014701570167017701870197020702170227023702470257026702770287029703070317032703370347035703670377038703970407041704270437044704570467047704870497050705170527053705470557056705770587059706070617062706370647065706670677068706970707071707270737074707570767077707870797080708170827083708470857086708770887089709070917092709370947095709670977098709971007101710271037104710571067107710871097110711171127113711471157116711771187119712071217122712371247125712671277128712971307131713271337134713571367137713871397140714171427143714471457146714771487149715071517152715371547155715671577158715971607161716271637164716571667167716871697170717171727173717471757176717771787179718071817182718371847185718671877188718971907191719271937194719571967197719871997200720172027203720472057206720772087209721072117212721372147215721672177218721972207221722272237224722572267227722872297230723172327233723472357236723772387239724072417242724372447245724672477248724972507251725272537254725572567257725872597260726172627263726472657266726772687269727072717272727372747275727672777278727972807281728272837284728572867287728872897290729172927293729472957296729772987299730073017302730373047305730673077308730973107311731273137314731573167317731873197320732173227323732473257326732773287329733073317332733373347335733673377338733973407341734273437344734573467347734873497350735173527353735473557356735773587359736073617362736373647365736673677368736973707371737273737374737573767377737873797380738173827383738473857386738773887389739073917392739373947395739673977398739974007401740274037404740574067407740874097410741174127413741474157416741774187419742074217422742374247425742674277428742974307431743274337434743574367437743874397440744174427443744474457446744774487449745074517452745374547455745674577458745974607461746274637464746574667467746874697470747174727473747474757476747774787479748074817482748374847485748674877488748974907491749274937494749574967497749874997500750175027503750475057506750775087509751075117512751375147515751675177518751975207521752275237524752575267527752875297530753175327533753475357536753775387539754075417542754375447545754675477548754975507551755275537554755575567557755875597560756175627563756475657566756775687569757075717572757375747575757675777578757975807581758275837584758575867587758875897590759175927593759475957596759775987599760076017602760376047605760676077608760976107611761276137614761576167617761876197620762176227623762476257626762776287629763076317632763376347635763676377638763976407641764276437644764576467647764876497650765176527653765476557656765776587659766076617662766376647665766676677668766976707671767276737674767576767677767876797680768176827683768476857686768776887689769076917692769376947695769676977698769977007701770277037704770577067707770877097710771177127713771477157716771777187719772077217722772377247725772677277728772977307731773277337734773577367737773877397740774177427743774477457746774777487749775077517752775377547755775677577758775977607761776277637764776577667767776877697770777177727773777477757776777777787779778077817782778377847785778677877788778977907791779277937794779577967797779877997800780178027803780478057806780778087809781078117812781378147815781678177818781978207821782278237824782578267827782878297830783178327833783478357836783778387839784078417842784378447845784678477848784978507851785278537854785578567857785878597860786178627863786478657866786778687869787078717872 |
- //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- //
- /// \file This file implements the LegalizerHelper class to legalize
- /// individual instructions and the LegalizeMachineIR wrapper pass for the
- /// primary legalization.
- //
- //===----------------------------------------------------------------------===//
- #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
- #include "llvm/CodeGen/GlobalISel/CallLowering.h"
- #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
- #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
- #include "llvm/CodeGen/GlobalISel/LostDebugLocObserver.h"
- #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
- #include "llvm/CodeGen/GlobalISel/Utils.h"
- #include "llvm/CodeGen/MachineRegisterInfo.h"
- #include "llvm/CodeGen/TargetFrameLowering.h"
- #include "llvm/CodeGen/TargetInstrInfo.h"
- #include "llvm/CodeGen/TargetLowering.h"
- #include "llvm/CodeGen/TargetOpcodes.h"
- #include "llvm/CodeGen/TargetSubtargetInfo.h"
- #include "llvm/IR/Instructions.h"
- #include "llvm/Support/Debug.h"
- #include "llvm/Support/MathExtras.h"
- #include "llvm/Support/raw_ostream.h"
- #include "llvm/Target/TargetMachine.h"
- #define DEBUG_TYPE "legalizer"
- using namespace llvm;
- using namespace LegalizeActions;
- using namespace MIPatternMatch;
- /// Try to break down \p OrigTy into \p NarrowTy sized pieces.
- ///
- /// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
- /// with any leftover piece as type \p LeftoverTy
- ///
- /// Returns -1 in the first element of the pair if the breakdown is not
- /// satisfiable.
- static std::pair<int, int>
- getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
- assert(!LeftoverTy.isValid() && "this is an out argument");
- unsigned Size = OrigTy.getSizeInBits();
- unsigned NarrowSize = NarrowTy.getSizeInBits();
- unsigned NumParts = Size / NarrowSize;
- unsigned LeftoverSize = Size - NumParts * NarrowSize;
- assert(Size > NarrowSize);
- if (LeftoverSize == 0)
- return {NumParts, 0};
- if (NarrowTy.isVector()) {
- unsigned EltSize = OrigTy.getScalarSizeInBits();
- if (LeftoverSize % EltSize != 0)
- return {-1, -1};
- LeftoverTy = LLT::scalarOrVector(
- ElementCount::getFixed(LeftoverSize / EltSize), EltSize);
- } else {
- LeftoverTy = LLT::scalar(LeftoverSize);
- }
- int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
- return std::make_pair(NumParts, NumLeftover);
- }
- static Type *getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty) {
- if (!Ty.isScalar())
- return nullptr;
- switch (Ty.getSizeInBits()) {
- case 16:
- return Type::getHalfTy(Ctx);
- case 32:
- return Type::getFloatTy(Ctx);
- case 64:
- return Type::getDoubleTy(Ctx);
- case 80:
- return Type::getX86_FP80Ty(Ctx);
- case 128:
- return Type::getFP128Ty(Ctx);
- default:
- return nullptr;
- }
- }
- LegalizerHelper::LegalizerHelper(MachineFunction &MF,
- GISelChangeObserver &Observer,
- MachineIRBuilder &Builder)
- : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
- LI(*MF.getSubtarget().getLegalizerInfo()),
- TLI(*MF.getSubtarget().getTargetLowering()) { }
- LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
- GISelChangeObserver &Observer,
- MachineIRBuilder &B)
- : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
- TLI(*MF.getSubtarget().getTargetLowering()) { }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::legalizeInstrStep(MachineInstr &MI,
- LostDebugLocObserver &LocObserver) {
- LLVM_DEBUG(dbgs() << "Legalizing: " << MI);
- MIRBuilder.setInstrAndDebugLoc(MI);
- if (MI.getOpcode() == TargetOpcode::G_INTRINSIC ||
- MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
- return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
- auto Step = LI.getAction(MI, MRI);
- switch (Step.Action) {
- case Legal:
- LLVM_DEBUG(dbgs() << ".. Already legal\n");
- return AlreadyLegal;
- case Libcall:
- LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
- return libcall(MI, LocObserver);
- case NarrowScalar:
- LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
- return narrowScalar(MI, Step.TypeIdx, Step.NewType);
- case WidenScalar:
- LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
- return widenScalar(MI, Step.TypeIdx, Step.NewType);
- case Bitcast:
- LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
- return bitcast(MI, Step.TypeIdx, Step.NewType);
- case Lower:
- LLVM_DEBUG(dbgs() << ".. Lower\n");
- return lower(MI, Step.TypeIdx, Step.NewType);
- case FewerElements:
- LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
- return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
- case MoreElements:
- LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
- return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
- case Custom:
- LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
- return LI.legalizeCustom(*this, MI) ? Legalized : UnableToLegalize;
- default:
- LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
- return UnableToLegalize;
- }
- }
- void LegalizerHelper::extractParts(Register Reg, LLT Ty, int NumParts,
- SmallVectorImpl<Register> &VRegs) {
- for (int i = 0; i < NumParts; ++i)
- VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
- MIRBuilder.buildUnmerge(VRegs, Reg);
- }
- bool LegalizerHelper::extractParts(Register Reg, LLT RegTy,
- LLT MainTy, LLT &LeftoverTy,
- SmallVectorImpl<Register> &VRegs,
- SmallVectorImpl<Register> &LeftoverRegs) {
- assert(!LeftoverTy.isValid() && "this is an out argument");
- unsigned RegSize = RegTy.getSizeInBits();
- unsigned MainSize = MainTy.getSizeInBits();
- unsigned NumParts = RegSize / MainSize;
- unsigned LeftoverSize = RegSize - NumParts * MainSize;
- // Use an unmerge when possible.
- if (LeftoverSize == 0) {
- for (unsigned I = 0; I < NumParts; ++I)
- VRegs.push_back(MRI.createGenericVirtualRegister(MainTy));
- MIRBuilder.buildUnmerge(VRegs, Reg);
- return true;
- }
- // Perform irregular split. Leftover is last element of RegPieces.
- if (MainTy.isVector()) {
- SmallVector<Register, 8> RegPieces;
- extractVectorParts(Reg, MainTy.getNumElements(), RegPieces);
- for (unsigned i = 0; i < RegPieces.size() - 1; ++i)
- VRegs.push_back(RegPieces[i]);
- LeftoverRegs.push_back(RegPieces[RegPieces.size() - 1]);
- LeftoverTy = MRI.getType(LeftoverRegs[0]);
- return true;
- }
- LeftoverTy = LLT::scalar(LeftoverSize);
- // For irregular sizes, extract the individual parts.
- for (unsigned I = 0; I != NumParts; ++I) {
- Register NewReg = MRI.createGenericVirtualRegister(MainTy);
- VRegs.push_back(NewReg);
- MIRBuilder.buildExtract(NewReg, Reg, MainSize * I);
- }
- for (unsigned Offset = MainSize * NumParts; Offset < RegSize;
- Offset += LeftoverSize) {
- Register NewReg = MRI.createGenericVirtualRegister(LeftoverTy);
- LeftoverRegs.push_back(NewReg);
- MIRBuilder.buildExtract(NewReg, Reg, Offset);
- }
- return true;
- }
- void LegalizerHelper::extractVectorParts(Register Reg, unsigned NumElts,
- SmallVectorImpl<Register> &VRegs) {
- LLT RegTy = MRI.getType(Reg);
- assert(RegTy.isVector() && "Expected a vector type");
- LLT EltTy = RegTy.getElementType();
- LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
- unsigned RegNumElts = RegTy.getNumElements();
- unsigned LeftoverNumElts = RegNumElts % NumElts;
- unsigned NumNarrowTyPieces = RegNumElts / NumElts;
- // Perfect split without leftover
- if (LeftoverNumElts == 0)
- return extractParts(Reg, NarrowTy, NumNarrowTyPieces, VRegs);
- // Irregular split. Provide direct access to all elements for artifact
- // combiner using unmerge to elements. Then build vectors with NumElts
- // elements. Remaining element(s) will be (used to build vector) Leftover.
- SmallVector<Register, 8> Elts;
- extractParts(Reg, EltTy, RegNumElts, Elts);
- unsigned Offset = 0;
- // Requested sub-vectors of NarrowTy.
- for (unsigned i = 0; i < NumNarrowTyPieces; ++i, Offset += NumElts) {
- ArrayRef<Register> Pieces(&Elts[Offset], NumElts);
- VRegs.push_back(MIRBuilder.buildMerge(NarrowTy, Pieces).getReg(0));
- }
- // Leftover element(s).
- if (LeftoverNumElts == 1) {
- VRegs.push_back(Elts[Offset]);
- } else {
- LLT LeftoverTy = LLT::fixed_vector(LeftoverNumElts, EltTy);
- ArrayRef<Register> Pieces(&Elts[Offset], LeftoverNumElts);
- VRegs.push_back(MIRBuilder.buildMerge(LeftoverTy, Pieces).getReg(0));
- }
- }
- void LegalizerHelper::insertParts(Register DstReg,
- LLT ResultTy, LLT PartTy,
- ArrayRef<Register> PartRegs,
- LLT LeftoverTy,
- ArrayRef<Register> LeftoverRegs) {
- if (!LeftoverTy.isValid()) {
- assert(LeftoverRegs.empty());
- if (!ResultTy.isVector()) {
- MIRBuilder.buildMerge(DstReg, PartRegs);
- return;
- }
- if (PartTy.isVector())
- MIRBuilder.buildConcatVectors(DstReg, PartRegs);
- else
- MIRBuilder.buildBuildVector(DstReg, PartRegs);
- return;
- }
- // Merge sub-vectors with different number of elements and insert into DstReg.
- if (ResultTy.isVector()) {
- assert(LeftoverRegs.size() == 1 && "Expected one leftover register");
- SmallVector<Register, 8> AllRegs;
- for (auto Reg : concat<const Register>(PartRegs, LeftoverRegs))
- AllRegs.push_back(Reg);
- return mergeMixedSubvectors(DstReg, AllRegs);
- }
- SmallVector<Register> GCDRegs;
- LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy);
- for (auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
- extractGCDType(GCDRegs, GCDTy, PartReg);
- LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
- buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
- }
- void LegalizerHelper::appendVectorElts(SmallVectorImpl<Register> &Elts,
- Register Reg) {
- LLT Ty = MRI.getType(Reg);
- SmallVector<Register, 8> RegElts;
- extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts);
- Elts.append(RegElts);
- }
- /// Merge \p PartRegs with different types into \p DstReg.
- void LegalizerHelper::mergeMixedSubvectors(Register DstReg,
- ArrayRef<Register> PartRegs) {
- SmallVector<Register, 8> AllElts;
- for (unsigned i = 0; i < PartRegs.size() - 1; ++i)
- appendVectorElts(AllElts, PartRegs[i]);
- Register Leftover = PartRegs[PartRegs.size() - 1];
- if (MRI.getType(Leftover).isScalar())
- AllElts.push_back(Leftover);
- else
- appendVectorElts(AllElts, Leftover);
- MIRBuilder.buildMerge(DstReg, AllElts);
- }
- /// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
- static void getUnmergeResults(SmallVectorImpl<Register> &Regs,
- const MachineInstr &MI) {
- assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
- const int StartIdx = Regs.size();
- const int NumResults = MI.getNumOperands() - 1;
- Regs.resize(Regs.size() + NumResults);
- for (int I = 0; I != NumResults; ++I)
- Regs[StartIdx + I] = MI.getOperand(I).getReg();
- }
- void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
- LLT GCDTy, Register SrcReg) {
- LLT SrcTy = MRI.getType(SrcReg);
- if (SrcTy == GCDTy) {
- // If the source already evenly divides the result type, we don't need to do
- // anything.
- Parts.push_back(SrcReg);
- } else {
- // Need to split into common type sized pieces.
- auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
- getUnmergeResults(Parts, *Unmerge);
- }
- }
- LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
- LLT NarrowTy, Register SrcReg) {
- LLT SrcTy = MRI.getType(SrcReg);
- LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
- extractGCDType(Parts, GCDTy, SrcReg);
- return GCDTy;
- }
- LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
- SmallVectorImpl<Register> &VRegs,
- unsigned PadStrategy) {
- LLT LCMTy = getLCMType(DstTy, NarrowTy);
- int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
- int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
- int NumOrigSrc = VRegs.size();
- Register PadReg;
- // Get a value we can use to pad the source value if the sources won't evenly
- // cover the result type.
- if (NumOrigSrc < NumParts * NumSubParts) {
- if (PadStrategy == TargetOpcode::G_ZEXT)
- PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
- else if (PadStrategy == TargetOpcode::G_ANYEXT)
- PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
- else {
- assert(PadStrategy == TargetOpcode::G_SEXT);
- // Shift the sign bit of the low register through the high register.
- auto ShiftAmt =
- MIRBuilder.buildConstant(LLT::scalar(64), GCDTy.getSizeInBits() - 1);
- PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
- }
- }
- // Registers for the final merge to be produced.
- SmallVector<Register, 4> Remerge(NumParts);
- // Registers needed for intermediate merges, which will be merged into a
- // source for Remerge.
- SmallVector<Register, 4> SubMerge(NumSubParts);
- // Once we've fully read off the end of the original source bits, we can reuse
- // the same high bits for remaining padding elements.
- Register AllPadReg;
- // Build merges to the LCM type to cover the original result type.
- for (int I = 0; I != NumParts; ++I) {
- bool AllMergePartsArePadding = true;
- // Build the requested merges to the requested type.
- for (int J = 0; J != NumSubParts; ++J) {
- int Idx = I * NumSubParts + J;
- if (Idx >= NumOrigSrc) {
- SubMerge[J] = PadReg;
- continue;
- }
- SubMerge[J] = VRegs[Idx];
- // There are meaningful bits here we can't reuse later.
- AllMergePartsArePadding = false;
- }
- // If we've filled up a complete piece with padding bits, we can directly
- // emit the natural sized constant if applicable, rather than a merge of
- // smaller constants.
- if (AllMergePartsArePadding && !AllPadReg) {
- if (PadStrategy == TargetOpcode::G_ANYEXT)
- AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
- else if (PadStrategy == TargetOpcode::G_ZEXT)
- AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
- // If this is a sign extension, we can't materialize a trivial constant
- // with the right type and have to produce a merge.
- }
- if (AllPadReg) {
- // Avoid creating additional instructions if we're just adding additional
- // copies of padding bits.
- Remerge[I] = AllPadReg;
- continue;
- }
- if (NumSubParts == 1)
- Remerge[I] = SubMerge[0];
- else
- Remerge[I] = MIRBuilder.buildMerge(NarrowTy, SubMerge).getReg(0);
- // In the sign extend padding case, re-use the first all-signbit merge.
- if (AllMergePartsArePadding && !AllPadReg)
- AllPadReg = Remerge[I];
- }
- VRegs = std::move(Remerge);
- return LCMTy;
- }
- void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
- ArrayRef<Register> RemergeRegs) {
- LLT DstTy = MRI.getType(DstReg);
- // Create the merge to the widened source, and extract the relevant bits into
- // the result.
- if (DstTy == LCMTy) {
- MIRBuilder.buildMerge(DstReg, RemergeRegs);
- return;
- }
- auto Remerge = MIRBuilder.buildMerge(LCMTy, RemergeRegs);
- if (DstTy.isScalar() && LCMTy.isScalar()) {
- MIRBuilder.buildTrunc(DstReg, Remerge);
- return;
- }
- if (LCMTy.isVector()) {
- unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
- SmallVector<Register, 8> UnmergeDefs(NumDefs);
- UnmergeDefs[0] = DstReg;
- for (unsigned I = 1; I != NumDefs; ++I)
- UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
- MIRBuilder.buildUnmerge(UnmergeDefs,
- MIRBuilder.buildMerge(LCMTy, RemergeRegs));
- return;
- }
- llvm_unreachable("unhandled case");
- }
- static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
- #define RTLIBCASE_INT(LibcallPrefix) \
- do { \
- switch (Size) { \
- case 32: \
- return RTLIB::LibcallPrefix##32; \
- case 64: \
- return RTLIB::LibcallPrefix##64; \
- case 128: \
- return RTLIB::LibcallPrefix##128; \
- default: \
- llvm_unreachable("unexpected size"); \
- } \
- } while (0)
- #define RTLIBCASE(LibcallPrefix) \
- do { \
- switch (Size) { \
- case 32: \
- return RTLIB::LibcallPrefix##32; \
- case 64: \
- return RTLIB::LibcallPrefix##64; \
- case 80: \
- return RTLIB::LibcallPrefix##80; \
- case 128: \
- return RTLIB::LibcallPrefix##128; \
- default: \
- llvm_unreachable("unexpected size"); \
- } \
- } while (0)
- switch (Opcode) {
- case TargetOpcode::G_SDIV:
- RTLIBCASE_INT(SDIV_I);
- case TargetOpcode::G_UDIV:
- RTLIBCASE_INT(UDIV_I);
- case TargetOpcode::G_SREM:
- RTLIBCASE_INT(SREM_I);
- case TargetOpcode::G_UREM:
- RTLIBCASE_INT(UREM_I);
- case TargetOpcode::G_CTLZ_ZERO_UNDEF:
- RTLIBCASE_INT(CTLZ_I);
- case TargetOpcode::G_FADD:
- RTLIBCASE(ADD_F);
- case TargetOpcode::G_FSUB:
- RTLIBCASE(SUB_F);
- case TargetOpcode::G_FMUL:
- RTLIBCASE(MUL_F);
- case TargetOpcode::G_FDIV:
- RTLIBCASE(DIV_F);
- case TargetOpcode::G_FEXP:
- RTLIBCASE(EXP_F);
- case TargetOpcode::G_FEXP2:
- RTLIBCASE(EXP2_F);
- case TargetOpcode::G_FREM:
- RTLIBCASE(REM_F);
- case TargetOpcode::G_FPOW:
- RTLIBCASE(POW_F);
- case TargetOpcode::G_FMA:
- RTLIBCASE(FMA_F);
- case TargetOpcode::G_FSIN:
- RTLIBCASE(SIN_F);
- case TargetOpcode::G_FCOS:
- RTLIBCASE(COS_F);
- case TargetOpcode::G_FLOG10:
- RTLIBCASE(LOG10_F);
- case TargetOpcode::G_FLOG:
- RTLIBCASE(LOG_F);
- case TargetOpcode::G_FLOG2:
- RTLIBCASE(LOG2_F);
- case TargetOpcode::G_FCEIL:
- RTLIBCASE(CEIL_F);
- case TargetOpcode::G_FFLOOR:
- RTLIBCASE(FLOOR_F);
- case TargetOpcode::G_FMINNUM:
- RTLIBCASE(FMIN_F);
- case TargetOpcode::G_FMAXNUM:
- RTLIBCASE(FMAX_F);
- case TargetOpcode::G_FSQRT:
- RTLIBCASE(SQRT_F);
- case TargetOpcode::G_FRINT:
- RTLIBCASE(RINT_F);
- case TargetOpcode::G_FNEARBYINT:
- RTLIBCASE(NEARBYINT_F);
- case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
- RTLIBCASE(ROUNDEVEN_F);
- }
- llvm_unreachable("Unknown libcall function");
- }
- /// True if an instruction is in tail position in its caller. Intended for
- /// legalizing libcalls as tail calls when possible.
- static bool isLibCallInTailPosition(MachineInstr &MI,
- const TargetInstrInfo &TII,
- MachineRegisterInfo &MRI) {
- MachineBasicBlock &MBB = *MI.getParent();
- const Function &F = MBB.getParent()->getFunction();
- // Conservatively require the attributes of the call to match those of
- // the return. Ignore NoAlias and NonNull because they don't affect the
- // call sequence.
- AttributeList CallerAttrs = F.getAttributes();
- if (AttrBuilder(F.getContext(), CallerAttrs.getRetAttrs())
- .removeAttribute(Attribute::NoAlias)
- .removeAttribute(Attribute::NonNull)
- .hasAttributes())
- return false;
- // It's not safe to eliminate the sign / zero extension of the return value.
- if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
- CallerAttrs.hasRetAttr(Attribute::SExt))
- return false;
- // Only tail call if the following instruction is a standard return or if we
- // have a `thisreturn` callee, and a sequence like:
- //
- // G_MEMCPY %0, %1, %2
- // $x0 = COPY %0
- // RET_ReallyLR implicit $x0
- auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
- if (Next != MBB.instr_end() && Next->isCopy()) {
- switch (MI.getOpcode()) {
- default:
- llvm_unreachable("unsupported opcode");
- case TargetOpcode::G_BZERO:
- return false;
- case TargetOpcode::G_MEMCPY:
- case TargetOpcode::G_MEMMOVE:
- case TargetOpcode::G_MEMSET:
- break;
- }
- Register VReg = MI.getOperand(0).getReg();
- if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg())
- return false;
- Register PReg = Next->getOperand(0).getReg();
- if (!PReg.isPhysical())
- return false;
- auto Ret = next_nodbg(Next, MBB.instr_end());
- if (Ret == MBB.instr_end() || !Ret->isReturn())
- return false;
- if (Ret->getNumImplicitOperands() != 1)
- return false;
- if (PReg != Ret->getOperand(0).getReg())
- return false;
- // Skip over the COPY that we just validated.
- Next = Ret;
- }
- if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
- return false;
- return true;
- }
- LegalizerHelper::LegalizeResult
- llvm::createLibcall(MachineIRBuilder &MIRBuilder, const char *Name,
- const CallLowering::ArgInfo &Result,
- ArrayRef<CallLowering::ArgInfo> Args,
- const CallingConv::ID CC) {
- auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
- CallLowering::CallLoweringInfo Info;
- Info.CallConv = CC;
- Info.Callee = MachineOperand::CreateES(Name);
- Info.OrigRet = Result;
- std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
- if (!CLI.lowerCall(MIRBuilder, Info))
- return LegalizerHelper::UnableToLegalize;
- return LegalizerHelper::Legalized;
- }
- LegalizerHelper::LegalizeResult
- llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
- const CallLowering::ArgInfo &Result,
- ArrayRef<CallLowering::ArgInfo> Args) {
- auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
- const char *Name = TLI.getLibcallName(Libcall);
- const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
- return createLibcall(MIRBuilder, Name, Result, Args, CC);
- }
- // Useful for libcalls where all operands have the same type.
- static LegalizerHelper::LegalizeResult
- simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
- Type *OpType) {
- auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
- // FIXME: What does the original arg index mean here?
- SmallVector<CallLowering::ArgInfo, 3> Args;
- for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
- Args.push_back({MO.getReg(), OpType, 0});
- return createLibcall(MIRBuilder, Libcall,
- {MI.getOperand(0).getReg(), OpType, 0}, Args);
- }
- LegalizerHelper::LegalizeResult
- llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
- MachineInstr &MI, LostDebugLocObserver &LocObserver) {
- auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
- SmallVector<CallLowering::ArgInfo, 3> Args;
- // Add all the args, except for the last which is an imm denoting 'tail'.
- for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
- Register Reg = MI.getOperand(i).getReg();
- // Need derive an IR type for call lowering.
- LLT OpLLT = MRI.getType(Reg);
- Type *OpTy = nullptr;
- if (OpLLT.isPointer())
- OpTy = Type::getInt8PtrTy(Ctx, OpLLT.getAddressSpace());
- else
- OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
- Args.push_back({Reg, OpTy, 0});
- }
- auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
- auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
- RTLIB::Libcall RTLibcall;
- unsigned Opc = MI.getOpcode();
- switch (Opc) {
- case TargetOpcode::G_BZERO:
- RTLibcall = RTLIB::BZERO;
- break;
- case TargetOpcode::G_MEMCPY:
- RTLibcall = RTLIB::MEMCPY;
- Args[0].Flags[0].setReturned();
- break;
- case TargetOpcode::G_MEMMOVE:
- RTLibcall = RTLIB::MEMMOVE;
- Args[0].Flags[0].setReturned();
- break;
- case TargetOpcode::G_MEMSET:
- RTLibcall = RTLIB::MEMSET;
- Args[0].Flags[0].setReturned();
- break;
- default:
- llvm_unreachable("unsupported opcode");
- }
- const char *Name = TLI.getLibcallName(RTLibcall);
- // Unsupported libcall on the target.
- if (!Name) {
- LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
- << MIRBuilder.getTII().getName(Opc) << "\n");
- return LegalizerHelper::UnableToLegalize;
- }
- CallLowering::CallLoweringInfo Info;
- Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
- Info.Callee = MachineOperand::CreateES(Name);
- Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0);
- Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() &&
- isLibCallInTailPosition(MI, MIRBuilder.getTII(), MRI);
- std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
- if (!CLI.lowerCall(MIRBuilder, Info))
- return LegalizerHelper::UnableToLegalize;
- if (Info.LoweredTailCall) {
- assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
- // Check debug locations before removing the return.
- LocObserver.checkpoint(true);
- // We must have a return following the call (or debug insts) to get past
- // isLibCallInTailPosition.
- do {
- MachineInstr *Next = MI.getNextNode();
- assert(Next &&
- (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
- "Expected instr following MI to be return or debug inst?");
- // We lowered a tail call, so the call is now the return from the block.
- // Delete the old return.
- Next->eraseFromParent();
- } while (MI.getNextNode());
- // We expect to lose the debug location from the return.
- LocObserver.checkpoint(false);
- }
- return LegalizerHelper::Legalized;
- }
- static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
- Type *FromType) {
- auto ToMVT = MVT::getVT(ToType);
- auto FromMVT = MVT::getVT(FromType);
- switch (Opcode) {
- case TargetOpcode::G_FPEXT:
- return RTLIB::getFPEXT(FromMVT, ToMVT);
- case TargetOpcode::G_FPTRUNC:
- return RTLIB::getFPROUND(FromMVT, ToMVT);
- case TargetOpcode::G_FPTOSI:
- return RTLIB::getFPTOSINT(FromMVT, ToMVT);
- case TargetOpcode::G_FPTOUI:
- return RTLIB::getFPTOUINT(FromMVT, ToMVT);
- case TargetOpcode::G_SITOFP:
- return RTLIB::getSINTTOFP(FromMVT, ToMVT);
- case TargetOpcode::G_UITOFP:
- return RTLIB::getUINTTOFP(FromMVT, ToMVT);
- }
- llvm_unreachable("Unsupported libcall function");
- }
- static LegalizerHelper::LegalizeResult
- conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType,
- Type *FromType) {
- RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
- return createLibcall(MIRBuilder, Libcall,
- {MI.getOperand(0).getReg(), ToType, 0},
- {{MI.getOperand(1).getReg(), FromType, 0}});
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
- LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
- unsigned Size = LLTy.getSizeInBits();
- auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
- switch (MI.getOpcode()) {
- default:
- return UnableToLegalize;
- case TargetOpcode::G_SDIV:
- case TargetOpcode::G_UDIV:
- case TargetOpcode::G_SREM:
- case TargetOpcode::G_UREM:
- case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
- Type *HLTy = IntegerType::get(Ctx, Size);
- auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
- if (Status != Legalized)
- return Status;
- break;
- }
- case TargetOpcode::G_FADD:
- case TargetOpcode::G_FSUB:
- case TargetOpcode::G_FMUL:
- case TargetOpcode::G_FDIV:
- case TargetOpcode::G_FMA:
- case TargetOpcode::G_FPOW:
- case TargetOpcode::G_FREM:
- case TargetOpcode::G_FCOS:
- case TargetOpcode::G_FSIN:
- case TargetOpcode::G_FLOG10:
- case TargetOpcode::G_FLOG:
- case TargetOpcode::G_FLOG2:
- case TargetOpcode::G_FEXP:
- case TargetOpcode::G_FEXP2:
- case TargetOpcode::G_FCEIL:
- case TargetOpcode::G_FFLOOR:
- case TargetOpcode::G_FMINNUM:
- case TargetOpcode::G_FMAXNUM:
- case TargetOpcode::G_FSQRT:
- case TargetOpcode::G_FRINT:
- case TargetOpcode::G_FNEARBYINT:
- case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
- Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
- if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
- LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
- return UnableToLegalize;
- }
- auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
- if (Status != Legalized)
- return Status;
- break;
- }
- case TargetOpcode::G_FPEXT:
- case TargetOpcode::G_FPTRUNC: {
- Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
- Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
- if (!FromTy || !ToTy)
- return UnableToLegalize;
- LegalizeResult Status = conversionLibcall(MI, MIRBuilder, ToTy, FromTy );
- if (Status != Legalized)
- return Status;
- break;
- }
- case TargetOpcode::G_FPTOSI:
- case TargetOpcode::G_FPTOUI: {
- // FIXME: Support other types
- unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
- unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
- if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
- return UnableToLegalize;
- LegalizeResult Status = conversionLibcall(
- MI, MIRBuilder,
- ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
- FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx));
- if (Status != Legalized)
- return Status;
- break;
- }
- case TargetOpcode::G_SITOFP:
- case TargetOpcode::G_UITOFP: {
- // FIXME: Support other types
- unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
- unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
- if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
- return UnableToLegalize;
- LegalizeResult Status = conversionLibcall(
- MI, MIRBuilder,
- ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
- FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx));
- if (Status != Legalized)
- return Status;
- break;
- }
- case TargetOpcode::G_BZERO:
- case TargetOpcode::G_MEMCPY:
- case TargetOpcode::G_MEMMOVE:
- case TargetOpcode::G_MEMSET: {
- LegalizeResult Result =
- createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI, LocObserver);
- if (Result != Legalized)
- return Result;
- MI.eraseFromParent();
- return Result;
- }
- }
- MI.eraseFromParent();
- return Legalized;
- }
- LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
- unsigned TypeIdx,
- LLT NarrowTy) {
- uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
- uint64_t NarrowSize = NarrowTy.getSizeInBits();
- switch (MI.getOpcode()) {
- default:
- return UnableToLegalize;
- case TargetOpcode::G_IMPLICIT_DEF: {
- Register DstReg = MI.getOperand(0).getReg();
- LLT DstTy = MRI.getType(DstReg);
- // If SizeOp0 is not an exact multiple of NarrowSize, emit
- // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
- // FIXME: Although this would also be legal for the general case, it causes
- // a lot of regressions in the emitted code (superfluous COPYs, artifact
- // combines not being hit). This seems to be a problem related to the
- // artifact combiner.
- if (SizeOp0 % NarrowSize != 0) {
- LLT ImplicitTy = NarrowTy;
- if (DstTy.isVector())
- ImplicitTy = LLT::vector(DstTy.getElementCount(), ImplicitTy);
- Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
- MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
- MI.eraseFromParent();
- return Legalized;
- }
- int NumParts = SizeOp0 / NarrowSize;
- SmallVector<Register, 2> DstRegs;
- for (int i = 0; i < NumParts; ++i)
- DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
- if (DstTy.isVector())
- MIRBuilder.buildBuildVector(DstReg, DstRegs);
- else
- MIRBuilder.buildMerge(DstReg, DstRegs);
- MI.eraseFromParent();
- return Legalized;
- }
- case TargetOpcode::G_CONSTANT: {
- LLT Ty = MRI.getType(MI.getOperand(0).getReg());
- const APInt &Val = MI.getOperand(1).getCImm()->getValue();
- unsigned TotalSize = Ty.getSizeInBits();
- unsigned NarrowSize = NarrowTy.getSizeInBits();
- int NumParts = TotalSize / NarrowSize;
- SmallVector<Register, 4> PartRegs;
- for (int I = 0; I != NumParts; ++I) {
- unsigned Offset = I * NarrowSize;
- auto K = MIRBuilder.buildConstant(NarrowTy,
- Val.lshr(Offset).trunc(NarrowSize));
- PartRegs.push_back(K.getReg(0));
- }
- LLT LeftoverTy;
- unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
- SmallVector<Register, 1> LeftoverRegs;
- if (LeftoverBits != 0) {
- LeftoverTy = LLT::scalar(LeftoverBits);
- auto K = MIRBuilder.buildConstant(
- LeftoverTy,
- Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
- LeftoverRegs.push_back(K.getReg(0));
- }
- insertParts(MI.getOperand(0).getReg(),
- Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
- MI.eraseFromParent();
- return Legalized;
- }
- case TargetOpcode::G_SEXT:
- case TargetOpcode::G_ZEXT:
- case TargetOpcode::G_ANYEXT:
- return narrowScalarExt(MI, TypeIdx, NarrowTy);
- case TargetOpcode::G_TRUNC: {
- if (TypeIdx != 1)
- return UnableToLegalize;
- uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
- if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
- LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
- return UnableToLegalize;
- }
- auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
- MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
- MI.eraseFromParent();
- return Legalized;
- }
- case TargetOpcode::G_FREEZE: {
- if (TypeIdx != 0)
- return UnableToLegalize;
- LLT Ty = MRI.getType(MI.getOperand(0).getReg());
- // Should widen scalar first
- if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0)
- return UnableToLegalize;
- auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
- SmallVector<Register, 8> Parts;
- for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
- Parts.push_back(
- MIRBuilder.buildFreeze(NarrowTy, Unmerge.getReg(i)).getReg(0));
- }
- MIRBuilder.buildMerge(MI.getOperand(0).getReg(), Parts);
- MI.eraseFromParent();
- return Legalized;
- }
- case TargetOpcode::G_ADD:
- case TargetOpcode::G_SUB:
- case TargetOpcode::G_SADDO:
- case TargetOpcode::G_SSUBO:
- case TargetOpcode::G_SADDE:
- case TargetOpcode::G_SSUBE:
- case TargetOpcode::G_UADDO:
- case TargetOpcode::G_USUBO:
- case TargetOpcode::G_UADDE:
- case TargetOpcode::G_USUBE:
- return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
- case TargetOpcode::G_MUL:
- case TargetOpcode::G_UMULH:
- return narrowScalarMul(MI, NarrowTy);
- case TargetOpcode::G_EXTRACT:
- return narrowScalarExtract(MI, TypeIdx, NarrowTy);
- case TargetOpcode::G_INSERT:
- return narrowScalarInsert(MI, TypeIdx, NarrowTy);
- case TargetOpcode::G_LOAD: {
- auto &LoadMI = cast<GLoad>(MI);
- Register DstReg = LoadMI.getDstReg();
- LLT DstTy = MRI.getType(DstReg);
- if (DstTy.isVector())
- return UnableToLegalize;
- if (8 * LoadMI.getMemSize() != DstTy.getSizeInBits()) {
- Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
- MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
- MIRBuilder.buildAnyExt(DstReg, TmpReg);
- LoadMI.eraseFromParent();
- return Legalized;
- }
- return reduceLoadStoreWidth(LoadMI, TypeIdx, NarrowTy);
- }
- case TargetOpcode::G_ZEXTLOAD:
- case TargetOpcode::G_SEXTLOAD: {
- auto &LoadMI = cast<GExtLoad>(MI);
- Register DstReg = LoadMI.getDstReg();
- Register PtrReg = LoadMI.getPointerReg();
- Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
- auto &MMO = LoadMI.getMMO();
- unsigned MemSize = MMO.getSizeInBits();
- if (MemSize == NarrowSize) {
- MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
- } else if (MemSize < NarrowSize) {
- MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
- } else if (MemSize > NarrowSize) {
- // FIXME: Need to split the load.
- return UnableToLegalize;
- }
- if (isa<GZExtLoad>(LoadMI))
- MIRBuilder.buildZExt(DstReg, TmpReg);
- else
- MIRBuilder.buildSExt(DstReg, TmpReg);
- LoadMI.eraseFromParent();
- return Legalized;
- }
- case TargetOpcode::G_STORE: {
- auto &StoreMI = cast<GStore>(MI);
- Register SrcReg = StoreMI.getValueReg();
- LLT SrcTy = MRI.getType(SrcReg);
- if (SrcTy.isVector())
- return UnableToLegalize;
- int NumParts = SizeOp0 / NarrowSize;
- unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
- unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
- if (SrcTy.isVector() && LeftoverBits != 0)
- return UnableToLegalize;
- if (8 * StoreMI.getMemSize() != SrcTy.getSizeInBits()) {
- Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
- MIRBuilder.buildTrunc(TmpReg, SrcReg);
- MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
- StoreMI.eraseFromParent();
- return Legalized;
- }
- return reduceLoadStoreWidth(StoreMI, 0, NarrowTy);
- }
- case TargetOpcode::G_SELECT:
- return narrowScalarSelect(MI, TypeIdx, NarrowTy);
- case TargetOpcode::G_AND:
- case TargetOpcode::G_OR:
- case TargetOpcode::G_XOR: {
- // Legalize bitwise operation:
- // A = BinOp<Ty> B, C
- // into:
- // B1, ..., BN = G_UNMERGE_VALUES B
- // C1, ..., CN = G_UNMERGE_VALUES C
- // A1 = BinOp<Ty/N> B1, C2
- // ...
- // AN = BinOp<Ty/N> BN, CN
- // A = G_MERGE_VALUES A1, ..., AN
- return narrowScalarBasic(MI, TypeIdx, NarrowTy);
- }
- case TargetOpcode::G_SHL:
- case TargetOpcode::G_LSHR:
- case TargetOpcode::G_ASHR:
- return narrowScalarShift(MI, TypeIdx, NarrowTy);
- case TargetOpcode::G_CTLZ:
- case TargetOpcode::G_CTLZ_ZERO_UNDEF:
- case TargetOpcode::G_CTTZ:
- case TargetOpcode::G_CTTZ_ZERO_UNDEF:
- case TargetOpcode::G_CTPOP:
- if (TypeIdx == 1)
- switch (MI.getOpcode()) {
- case TargetOpcode::G_CTLZ:
- case TargetOpcode::G_CTLZ_ZERO_UNDEF:
- return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
- case TargetOpcode::G_CTTZ:
- case TargetOpcode::G_CTTZ_ZERO_UNDEF:
- return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
- case TargetOpcode::G_CTPOP:
- return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
- default:
- return UnableToLegalize;
- }
- Observer.changingInstr(MI);
- narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
- Observer.changedInstr(MI);
- return Legalized;
- case TargetOpcode::G_INTTOPTR:
- if (TypeIdx != 1)
- return UnableToLegalize;
- Observer.changingInstr(MI);
- narrowScalarSrc(MI, NarrowTy, 1);
- Observer.changedInstr(MI);
- return Legalized;
- case TargetOpcode::G_PTRTOINT:
- if (TypeIdx != 0)
- return UnableToLegalize;
- Observer.changingInstr(MI);
- narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
- Observer.changedInstr(MI);
- return Legalized;
- case TargetOpcode::G_PHI: {
- // FIXME: add support for when SizeOp0 isn't an exact multiple of
- // NarrowSize.
- if (SizeOp0 % NarrowSize != 0)
- return UnableToLegalize;
- unsigned NumParts = SizeOp0 / NarrowSize;
- SmallVector<Register, 2> DstRegs(NumParts);
- SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
- Observer.changingInstr(MI);
- for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
- MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
- MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
- extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
- SrcRegs[i / 2]);
- }
- MachineBasicBlock &MBB = *MI.getParent();
- MIRBuilder.setInsertPt(MBB, MI);
- for (unsigned i = 0; i < NumParts; ++i) {
- DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
- MachineInstrBuilder MIB =
- MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
- for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
- MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
- }
- MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
- MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
- Observer.changedInstr(MI);
- MI.eraseFromParent();
- return Legalized;
- }
- case TargetOpcode::G_EXTRACT_VECTOR_ELT:
- case TargetOpcode::G_INSERT_VECTOR_ELT: {
- if (TypeIdx != 2)
- return UnableToLegalize;
- int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
- Observer.changingInstr(MI);
- narrowScalarSrc(MI, NarrowTy, OpIdx);
- Observer.changedInstr(MI);
- return Legalized;
- }
- case TargetOpcode::G_ICMP: {
- Register LHS = MI.getOperand(2).getReg();
- LLT SrcTy = MRI.getType(LHS);
- uint64_t SrcSize = SrcTy.getSizeInBits();
- CmpInst::Predicate Pred =
- static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
- // TODO: Handle the non-equality case for weird sizes.
- if (NarrowSize * 2 != SrcSize && !ICmpInst::isEquality(Pred))
- return UnableToLegalize;
- LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
- SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
- if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
- LHSLeftoverRegs))
- return UnableToLegalize;
- LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
- SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
- if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
- RHSPartRegs, RHSLeftoverRegs))
- return UnableToLegalize;
- // We now have the LHS and RHS of the compare split into narrow-type
- // registers, plus potentially some leftover type.
- Register Dst = MI.getOperand(0).getReg();
- LLT ResTy = MRI.getType(Dst);
- if (ICmpInst::isEquality(Pred)) {
- // For each part on the LHS and RHS, keep track of the result of XOR-ing
- // them together. For each equal part, the result should be all 0s. For
- // each non-equal part, we'll get at least one 1.
- auto Zero = MIRBuilder.buildConstant(NarrowTy, 0);
- SmallVector<Register, 4> Xors;
- for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) {
- auto LHS = std::get<0>(LHSAndRHS);
- auto RHS = std::get<1>(LHSAndRHS);
- auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
- Xors.push_back(Xor);
- }
- // Build a G_XOR for each leftover register. Each G_XOR must be widened
- // to the desired narrow type so that we can OR them together later.
- SmallVector<Register, 4> WidenedXors;
- for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
- auto LHS = std::get<0>(LHSAndRHS);
- auto RHS = std::get<1>(LHSAndRHS);
- auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
- LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor);
- buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
- /* PadStrategy = */ TargetOpcode::G_ZEXT);
- Xors.insert(Xors.end(), WidenedXors.begin(), WidenedXors.end());
- }
- // Now, for each part we broke up, we know if they are equal/not equal
- // based off the G_XOR. We can OR these all together and compare against
- // 0 to get the result.
- assert(Xors.size() >= 2 && "Should have gotten at least two Xors?");
- auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
- for (unsigned I = 2, E = Xors.size(); I < E; ++I)
- Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]);
- MIRBuilder.buildICmp(Pred, Dst, Or, Zero);
- } else {
- // TODO: Handle non-power-of-two types.
- assert(LHSPartRegs.size() == 2 && "Expected exactly 2 LHS part regs?");
- assert(RHSPartRegs.size() == 2 && "Expected exactly 2 RHS part regs?");
- Register LHSL = LHSPartRegs[0];
- Register LHSH = LHSPartRegs[1];
- Register RHSL = RHSPartRegs[0];
- Register RHSH = RHSPartRegs[1];
- MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH);
- MachineInstrBuilder CmpHEQ =
- MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH);
- MachineInstrBuilder CmpLU = MIRBuilder.buildICmp(
- ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL);
- MIRBuilder.buildSelect(Dst, CmpHEQ, CmpLU, CmpH);
- }
- MI.eraseFromParent();
- return Legalized;
- }
- case TargetOpcode::G_SEXT_INREG: {
- if (TypeIdx != 0)
- return UnableToLegalize;
- int64_t SizeInBits = MI.getOperand(2).getImm();
- // So long as the new type has more bits than the bits we're extending we
- // don't need to break it apart.
- if (NarrowTy.getScalarSizeInBits() >= SizeInBits) {
- Observer.changingInstr(MI);
- // We don't lose any non-extension bits by truncating the src and
- // sign-extending the dst.
- MachineOperand &MO1 = MI.getOperand(1);
- auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
- MO1.setReg(TruncMIB.getReg(0));
- MachineOperand &MO2 = MI.getOperand(0);
- Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
- MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
- MIRBuilder.buildSExt(MO2, DstExt);
- MO2.setReg(DstExt);
- Observer.changedInstr(MI);
- return Legalized;
- }
- // Break it apart. Components below the extension point are unmodified. The
- // component containing the extension point becomes a narrower SEXT_INREG.
- // Components above it are ashr'd from the component containing the
- // extension point.
- if (SizeOp0 % NarrowSize != 0)
- return UnableToLegalize;
- int NumParts = SizeOp0 / NarrowSize;
- // List the registers where the destination will be scattered.
- SmallVector<Register, 2> DstRegs;
- // List the registers where the source will be split.
- SmallVector<Register, 2> SrcRegs;
- // Create all the temporary registers.
- for (int i = 0; i < NumParts; ++i) {
- Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
- SrcRegs.push_back(SrcReg);
- }
- // Explode the big arguments into smaller chunks.
- MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
- Register AshrCstReg =
- MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
- .getReg(0);
- Register FullExtensionReg = 0;
- Register PartialExtensionReg = 0;
- // Do the operation on each small part.
- for (int i = 0; i < NumParts; ++i) {
- if ((i + 1) * NarrowTy.getScalarSizeInBits() < SizeInBits)
- DstRegs.push_back(SrcRegs[i]);
- else if (i * NarrowTy.getScalarSizeInBits() > SizeInBits) {
- assert(PartialExtensionReg &&
- "Expected to visit partial extension before full");
- if (FullExtensionReg) {
- DstRegs.push_back(FullExtensionReg);
- continue;
- }
- DstRegs.push_back(
- MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
- .getReg(0));
- FullExtensionReg = DstRegs.back();
- } else {
- DstRegs.push_back(
- MIRBuilder
- .buildInstr(
- TargetOpcode::G_SEXT_INREG, {NarrowTy},
- {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
- .getReg(0));
- PartialExtensionReg = DstRegs.back();
- }
- }
- // Gather the destination registers into the final destination.
- Register DstReg = MI.getOperand(0).getReg();
- MIRBuilder.buildMerge(DstReg, DstRegs);
- MI.eraseFromParent();
- return Legalized;
- }
- case TargetOpcode::G_BSWAP:
- case TargetOpcode::G_BITREVERSE: {
- if (SizeOp0 % NarrowSize != 0)
- return UnableToLegalize;
- Observer.changingInstr(MI);
- SmallVector<Register, 2> SrcRegs, DstRegs;
- unsigned NumParts = SizeOp0 / NarrowSize;
- extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
- for (unsigned i = 0; i < NumParts; ++i) {
- auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
- {SrcRegs[NumParts - 1 - i]});
- DstRegs.push_back(DstPart.getReg(0));
- }
- MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
- Observer.changedInstr(MI);
- MI.eraseFromParent();
- return Legalized;
- }
- case TargetOpcode::G_PTR_ADD:
- case TargetOpcode::G_PTRMASK: {
- if (TypeIdx != 1)
- return UnableToLegalize;
- Observer.changingInstr(MI);
- narrowScalarSrc(MI, NarrowTy, 2);
- Observer.changedInstr(MI);
- return Legalized;
- }
- case TargetOpcode::G_FPTOUI:
- case TargetOpcode::G_FPTOSI:
- return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
- case TargetOpcode::G_FPEXT:
- if (TypeIdx != 0)
- return UnableToLegalize;
- Observer.changingInstr(MI);
- narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
- Observer.changedInstr(MI);
- return Legalized;
- }
- }
- Register LegalizerHelper::coerceToScalar(Register Val) {
- LLT Ty = MRI.getType(Val);
- if (Ty.isScalar())
- return Val;
- const DataLayout &DL = MIRBuilder.getDataLayout();
- LLT NewTy = LLT::scalar(Ty.getSizeInBits());
- if (Ty.isPointer()) {
- if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
- return Register();
- return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
- }
- Register NewVal = Val;
- assert(Ty.isVector());
- LLT EltTy = Ty.getElementType();
- if (EltTy.isPointer())
- NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
- return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
- }
- void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy,
- unsigned OpIdx, unsigned ExtOpcode) {
- MachineOperand &MO = MI.getOperand(OpIdx);
- auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
- MO.setReg(ExtB.getReg(0));
- }
- void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy,
- unsigned OpIdx) {
- MachineOperand &MO = MI.getOperand(OpIdx);
- auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
- MO.setReg(ExtB.getReg(0));
- }
- void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
- unsigned OpIdx, unsigned TruncOpcode) {
- MachineOperand &MO = MI.getOperand(OpIdx);
- Register DstExt = MRI.createGenericVirtualRegister(WideTy);
- MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
- MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
- MO.setReg(DstExt);
- }
- void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy,
- unsigned OpIdx, unsigned ExtOpcode) {
- MachineOperand &MO = MI.getOperand(OpIdx);
- Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
- MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
- MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
- MO.setReg(DstTrunc);
- }
- void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy,
- unsigned OpIdx) {
- MachineOperand &MO = MI.getOperand(OpIdx);
- MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
- Register Dst = MO.getReg();
- Register DstExt = MRI.createGenericVirtualRegister(WideTy);
- MO.setReg(DstExt);
- MIRBuilder.buildDeleteTrailingVectorElements(Dst, DstExt);
- }
- void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy,
- unsigned OpIdx) {
- MachineOperand &MO = MI.getOperand(OpIdx);
- SmallVector<Register, 8> Regs;
- MO.setReg(MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO).getReg(0));
- }
- void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
- MachineOperand &Op = MI.getOperand(OpIdx);
- Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
- }
- void LegalizerHelper::bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
- MachineOperand &MO = MI.getOperand(OpIdx);
- Register CastDst = MRI.createGenericVirtualRegister(CastTy);
- MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
- MIRBuilder.buildBitcast(MO, CastDst);
- MO.setReg(CastDst);
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
- LLT WideTy) {
- if (TypeIdx != 1)
- return UnableToLegalize;
- Register DstReg = MI.getOperand(0).getReg();
- LLT DstTy = MRI.getType(DstReg);
- if (DstTy.isVector())
- return UnableToLegalize;
- Register Src1 = MI.getOperand(1).getReg();
- LLT SrcTy = MRI.getType(Src1);
- const int DstSize = DstTy.getSizeInBits();
- const int SrcSize = SrcTy.getSizeInBits();
- const int WideSize = WideTy.getSizeInBits();
- const int NumMerge = (DstSize + WideSize - 1) / WideSize;
- unsigned NumOps = MI.getNumOperands();
- unsigned NumSrc = MI.getNumOperands() - 1;
- unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
- if (WideSize >= DstSize) {
- // Directly pack the bits in the target type.
- Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1).getReg(0);
- for (unsigned I = 2; I != NumOps; ++I) {
- const unsigned Offset = (I - 1) * PartSize;
- Register SrcReg = MI.getOperand(I).getReg();
- assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
- auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
- Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
- MRI.createGenericVirtualRegister(WideTy);
- auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
- auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
- MIRBuilder.buildOr(NextResult, ResultReg, Shl);
- ResultReg = NextResult;
- }
- if (WideSize > DstSize)
- MIRBuilder.buildTrunc(DstReg, ResultReg);
- else if (DstTy.isPointer())
- MIRBuilder.buildIntToPtr(DstReg, ResultReg);
- MI.eraseFromParent();
- return Legalized;
- }
- // Unmerge the original values to the GCD type, and recombine to the next
- // multiple greater than the original type.
- //
- // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
- // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
- // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
- // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
- // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
- // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
- // %12:_(s12) = G_MERGE_VALUES %10, %11
- //
- // Padding with undef if necessary:
- //
- // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
- // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
- // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
- // %7:_(s2) = G_IMPLICIT_DEF
- // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
- // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
- // %10:_(s12) = G_MERGE_VALUES %8, %9
- const int GCD = greatestCommonDivisor(SrcSize, WideSize);
- LLT GCDTy = LLT::scalar(GCD);
- SmallVector<Register, 8> Parts;
- SmallVector<Register, 8> NewMergeRegs;
- SmallVector<Register, 8> Unmerges;
- LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
- // Decompose the original operands if they don't evenly divide.
- for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
- Register SrcReg = MO.getReg();
- if (GCD == SrcSize) {
- Unmerges.push_back(SrcReg);
- } else {
- auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
- for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
- Unmerges.push_back(Unmerge.getReg(J));
- }
- }
- // Pad with undef to the next size that is a multiple of the requested size.
- if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
- Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
- for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
- Unmerges.push_back(UndefReg);
- }
- const int PartsPerGCD = WideSize / GCD;
- // Build merges of each piece.
- ArrayRef<Register> Slicer(Unmerges);
- for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
- auto Merge = MIRBuilder.buildMerge(WideTy, Slicer.take_front(PartsPerGCD));
- NewMergeRegs.push_back(Merge.getReg(0));
- }
- // A truncate may be necessary if the requested type doesn't evenly divide the
- // original result type.
- if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
- MIRBuilder.buildMerge(DstReg, NewMergeRegs);
- } else {
- auto FinalMerge = MIRBuilder.buildMerge(WideDstTy, NewMergeRegs);
- MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
- }
- MI.eraseFromParent();
- return Legalized;
- }
- Register LegalizerHelper::widenWithUnmerge(LLT WideTy, Register OrigReg) {
- Register WideReg = MRI.createGenericVirtualRegister(WideTy);
- LLT OrigTy = MRI.getType(OrigReg);
- LLT LCMTy = getLCMType(WideTy, OrigTy);
- const int NumMergeParts = LCMTy.getSizeInBits() / WideTy.getSizeInBits();
- const int NumUnmergeParts = LCMTy.getSizeInBits() / OrigTy.getSizeInBits();
- Register UnmergeSrc = WideReg;
- // Create a merge to the LCM type, padding with undef
- // %0:_(<3 x s32>) = G_FOO => <4 x s32>
- // =>
- // %1:_(<4 x s32>) = G_FOO
- // %2:_(<4 x s32>) = G_IMPLICIT_DEF
- // %3:_(<12 x s32>) = G_CONCAT_VECTORS %1, %2, %2
- // %0:_(<3 x s32>), %4:_, %5:_, %6:_ = G_UNMERGE_VALUES %3
- if (NumMergeParts > 1) {
- Register Undef = MIRBuilder.buildUndef(WideTy).getReg(0);
- SmallVector<Register, 8> MergeParts(NumMergeParts, Undef);
- MergeParts[0] = WideReg;
- UnmergeSrc = MIRBuilder.buildMerge(LCMTy, MergeParts).getReg(0);
- }
- // Unmerge to the original register and pad with dead defs.
- SmallVector<Register, 8> UnmergeResults(NumUnmergeParts);
- UnmergeResults[0] = OrigReg;
- for (int I = 1; I != NumUnmergeParts; ++I)
- UnmergeResults[I] = MRI.createGenericVirtualRegister(OrigTy);
- MIRBuilder.buildUnmerge(UnmergeResults, UnmergeSrc);
- return WideReg;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
- LLT WideTy) {
- if (TypeIdx != 0)
- return UnableToLegalize;
- int NumDst = MI.getNumOperands() - 1;
- Register SrcReg = MI.getOperand(NumDst).getReg();
- LLT SrcTy = MRI.getType(SrcReg);
- if (SrcTy.isVector())
- return UnableToLegalize;
- Register Dst0Reg = MI.getOperand(0).getReg();
- LLT DstTy = MRI.getType(Dst0Reg);
- if (!DstTy.isScalar())
- return UnableToLegalize;
- if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
- if (SrcTy.isPointer()) {
- const DataLayout &DL = MIRBuilder.getDataLayout();
- if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
- LLVM_DEBUG(
- dbgs() << "Not casting non-integral address space integer\n");
- return UnableToLegalize;
- }
- SrcTy = LLT::scalar(SrcTy.getSizeInBits());
- SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
- }
- // Widen SrcTy to WideTy. This does not affect the result, but since the
- // user requested this size, it is probably better handled than SrcTy and
- // should reduce the total number of legalization artifacts.
- if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
- SrcTy = WideTy;
- SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
- }
- // Theres no unmerge type to target. Directly extract the bits from the
- // source type
- unsigned DstSize = DstTy.getSizeInBits();
- MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
- for (int I = 1; I != NumDst; ++I) {
- auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
- auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
- MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
- }
- MI.eraseFromParent();
- return Legalized;
- }
- // Extend the source to a wider type.
- LLT LCMTy = getLCMType(SrcTy, WideTy);
- Register WideSrc = SrcReg;
- if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
- // TODO: If this is an integral address space, cast to integer and anyext.
- if (SrcTy.isPointer()) {
- LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
- return UnableToLegalize;
- }
- WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
- }
- auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
- // Create a sequence of unmerges and merges to the original results. Since we
- // may have widened the source, we will need to pad the results with dead defs
- // to cover the source register.
- // e.g. widen s48 to s64:
- // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
- //
- // =>
- // %4:_(s192) = G_ANYEXT %0:_(s96)
- // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
- // ; unpack to GCD type, with extra dead defs
- // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
- // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
- // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
- // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
- // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
- const LLT GCDTy = getGCDType(WideTy, DstTy);
- const int NumUnmerge = Unmerge->getNumOperands() - 1;
- const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
- // Directly unmerge to the destination without going through a GCD type
- // if possible
- if (PartsPerRemerge == 1) {
- const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
- for (int I = 0; I != NumUnmerge; ++I) {
- auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
- for (int J = 0; J != PartsPerUnmerge; ++J) {
- int Idx = I * PartsPerUnmerge + J;
- if (Idx < NumDst)
- MIB.addDef(MI.getOperand(Idx).getReg());
- else {
- // Create dead def for excess components.
- MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
- }
- }
- MIB.addUse(Unmerge.getReg(I));
- }
- } else {
- SmallVector<Register, 16> Parts;
- for (int J = 0; J != NumUnmerge; ++J)
- extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
- SmallVector<Register, 8> RemergeParts;
- for (int I = 0; I != NumDst; ++I) {
- for (int J = 0; J < PartsPerRemerge; ++J) {
- const int Idx = I * PartsPerRemerge + J;
- RemergeParts.emplace_back(Parts[Idx]);
- }
- MIRBuilder.buildMerge(MI.getOperand(I).getReg(), RemergeParts);
- RemergeParts.clear();
- }
- }
- MI.eraseFromParent();
- return Legalized;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
- LLT WideTy) {
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
- LLT SrcTy = MRI.getType(SrcReg);
- LLT DstTy = MRI.getType(DstReg);
- unsigned Offset = MI.getOperand(2).getImm();
- if (TypeIdx == 0) {
- if (SrcTy.isVector() || DstTy.isVector())
- return UnableToLegalize;
- SrcOp Src(SrcReg);
- if (SrcTy.isPointer()) {
- // Extracts from pointers can be handled only if they are really just
- // simple integers.
- const DataLayout &DL = MIRBuilder.getDataLayout();
- if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
- return UnableToLegalize;
- LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
- Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
- SrcTy = SrcAsIntTy;
- }
- if (DstTy.isPointer())
- return UnableToLegalize;
- if (Offset == 0) {
- // Avoid a shift in the degenerate case.
- MIRBuilder.buildTrunc(DstReg,
- MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
- MI.eraseFromParent();
- return Legalized;
- }
- // Do a shift in the source type.
- LLT ShiftTy = SrcTy;
- if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
- Src = MIRBuilder.buildAnyExt(WideTy, Src);
- ShiftTy = WideTy;
- }
- auto LShr = MIRBuilder.buildLShr(
- ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
- MIRBuilder.buildTrunc(DstReg, LShr);
- MI.eraseFromParent();
- return Legalized;
- }
- if (SrcTy.isScalar()) {
- Observer.changingInstr(MI);
- widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
- Observer.changedInstr(MI);
- return Legalized;
- }
- if (!SrcTy.isVector())
- return UnableToLegalize;
- if (DstTy != SrcTy.getElementType())
- return UnableToLegalize;
- if (Offset % SrcTy.getScalarSizeInBits() != 0)
- return UnableToLegalize;
- Observer.changingInstr(MI);
- widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
- MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
- Offset);
- widenScalarDst(MI, WideTy.getScalarType(), 0);
- Observer.changedInstr(MI);
- return Legalized;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
- LLT WideTy) {
- if (TypeIdx != 0 || WideTy.isVector())
- return UnableToLegalize;
- Observer.changingInstr(MI);
- widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
- widenScalarDst(MI, WideTy);
- Observer.changedInstr(MI);
- return Legalized;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
- LLT WideTy) {
- if (TypeIdx == 1)
- return UnableToLegalize; // TODO
- unsigned Opcode;
- unsigned ExtOpcode;
- Optional<Register> CarryIn = None;
- switch (MI.getOpcode()) {
- default:
- llvm_unreachable("Unexpected opcode!");
- case TargetOpcode::G_SADDO:
- Opcode = TargetOpcode::G_ADD;
- ExtOpcode = TargetOpcode::G_SEXT;
- break;
- case TargetOpcode::G_SSUBO:
- Opcode = TargetOpcode::G_SUB;
- ExtOpcode = TargetOpcode::G_SEXT;
- break;
- case TargetOpcode::G_UADDO:
- Opcode = TargetOpcode::G_ADD;
- ExtOpcode = TargetOpcode::G_ZEXT;
- break;
- case TargetOpcode::G_USUBO:
- Opcode = TargetOpcode::G_SUB;
- ExtOpcode = TargetOpcode::G_ZEXT;
- break;
- case TargetOpcode::G_SADDE:
- Opcode = TargetOpcode::G_UADDE;
- ExtOpcode = TargetOpcode::G_SEXT;
- CarryIn = MI.getOperand(4).getReg();
- break;
- case TargetOpcode::G_SSUBE:
- Opcode = TargetOpcode::G_USUBE;
- ExtOpcode = TargetOpcode::G_SEXT;
- CarryIn = MI.getOperand(4).getReg();
- break;
- case TargetOpcode::G_UADDE:
- Opcode = TargetOpcode::G_UADDE;
- ExtOpcode = TargetOpcode::G_ZEXT;
- CarryIn = MI.getOperand(4).getReg();
- break;
- case TargetOpcode::G_USUBE:
- Opcode = TargetOpcode::G_USUBE;
- ExtOpcode = TargetOpcode::G_ZEXT;
- CarryIn = MI.getOperand(4).getReg();
- break;
- }
- auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
- auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
- // Do the arithmetic in the larger type.
- Register NewOp;
- if (CarryIn) {
- LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
- NewOp = MIRBuilder
- .buildInstr(Opcode, {WideTy, CarryOutTy},
- {LHSExt, RHSExt, *CarryIn})
- .getReg(0);
- } else {
- NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
- }
- LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
- auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
- auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
- // There is no overflow if the ExtOp is the same as NewOp.
- MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
- // Now trunc the NewOp to the original result.
- MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
- MI.eraseFromParent();
- return Legalized;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
- LLT WideTy) {
- bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
- MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
- MI.getOpcode() == TargetOpcode::G_SSHLSAT;
- bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
- MI.getOpcode() == TargetOpcode::G_USHLSAT;
- // We can convert this to:
- // 1. Any extend iN to iM
- // 2. SHL by M-N
- // 3. [US][ADD|SUB|SHL]SAT
- // 4. L/ASHR by M-N
- //
- // It may be more efficient to lower this to a min and a max operation in
- // the higher precision arithmetic if the promoted operation isn't legal,
- // but this decision is up to the target's lowering request.
- Register DstReg = MI.getOperand(0).getReg();
- unsigned NewBits = WideTy.getScalarSizeInBits();
- unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
- // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
- // must not left shift the RHS to preserve the shift amount.
- auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
- auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
- : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
- auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
- auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
- auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
- auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
- {ShiftL, ShiftR}, MI.getFlags());
- // Use a shift that will preserve the number of sign bits when the trunc is
- // folded away.
- auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
- : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
- MIRBuilder.buildTrunc(DstReg, Result);
- MI.eraseFromParent();
- return Legalized;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
- LLT WideTy) {
- if (TypeIdx == 1)
- return UnableToLegalize;
- bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
- Register Result = MI.getOperand(0).getReg();
- Register OriginalOverflow = MI.getOperand(1).getReg();
- Register LHS = MI.getOperand(2).getReg();
- Register RHS = MI.getOperand(3).getReg();
- LLT SrcTy = MRI.getType(LHS);
- LLT OverflowTy = MRI.getType(OriginalOverflow);
- unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
- // To determine if the result overflowed in the larger type, we extend the
- // input to the larger type, do the multiply (checking if it overflows),
- // then also check the high bits of the result to see if overflow happened
- // there.
- unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
- auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
- auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
- auto Mulo = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy, OverflowTy},
- {LeftOperand, RightOperand});
- auto Mul = Mulo->getOperand(0);
- MIRBuilder.buildTrunc(Result, Mul);
- MachineInstrBuilder ExtResult;
- // Overflow occurred if it occurred in the larger type, or if the high part
- // of the result does not zero/sign-extend the low part. Check this second
- // possibility first.
- if (IsSigned) {
- // For signed, overflow occurred when the high part does not sign-extend
- // the low part.
- ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth);
- } else {
- // Unsigned overflow occurred when the high part does not zero-extend the
- // low part.
- ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
- }
- // Multiplication cannot overflow if the WideTy is >= 2 * original width,
- // so we don't need to check the overflow result of larger type Mulo.
- if (WideTy.getScalarSizeInBits() < 2 * SrcBitWidth) {
- auto Overflow =
- MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
- // Finally check if the multiplication in the larger type itself overflowed.
- MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow);
- } else {
- MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult);
- }
- MI.eraseFromParent();
- return Legalized;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
- switch (MI.getOpcode()) {
- default:
- return UnableToLegalize;
- case TargetOpcode::G_ATOMICRMW_XCHG:
- case TargetOpcode::G_ATOMICRMW_ADD:
- case TargetOpcode::G_ATOMICRMW_SUB:
- case TargetOpcode::G_ATOMICRMW_AND:
- case TargetOpcode::G_ATOMICRMW_OR:
- case TargetOpcode::G_ATOMICRMW_XOR:
- case TargetOpcode::G_ATOMICRMW_MIN:
- case TargetOpcode::G_ATOMICRMW_MAX:
- case TargetOpcode::G_ATOMICRMW_UMIN:
- case TargetOpcode::G_ATOMICRMW_UMAX:
- assert(TypeIdx == 0 && "atomicrmw with second scalar type");
- Observer.changingInstr(MI);
- widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
- widenScalarDst(MI, WideTy, 0);
- Observer.changedInstr(MI);
- return Legalized;
- case TargetOpcode::G_ATOMIC_CMPXCHG:
- assert(TypeIdx == 0 && "G_ATOMIC_CMPXCHG with second scalar type");
- Observer.changingInstr(MI);
- widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
- widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
- widenScalarDst(MI, WideTy, 0);
- Observer.changedInstr(MI);
- return Legalized;
- case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
- if (TypeIdx == 0) {
- Observer.changingInstr(MI);
- widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
- widenScalarSrc(MI, WideTy, 4, TargetOpcode::G_ANYEXT);
- widenScalarDst(MI, WideTy, 0);
- Observer.changedInstr(MI);
- return Legalized;
- }
- assert(TypeIdx == 1 &&
- "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
- Observer.changingInstr(MI);
- widenScalarDst(MI, WideTy, 1);
- Observer.changedInstr(MI);
- return Legalized;
- case TargetOpcode::G_EXTRACT:
- return widenScalarExtract(MI, TypeIdx, WideTy);
- case TargetOpcode::G_INSERT:
- return widenScalarInsert(MI, TypeIdx, WideTy);
- case TargetOpcode::G_MERGE_VALUES:
- return widenScalarMergeValues(MI, TypeIdx, WideTy);
- case TargetOpcode::G_UNMERGE_VALUES:
- return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
- case TargetOpcode::G_SADDO:
- case TargetOpcode::G_SSUBO:
- case TargetOpcode::G_UADDO:
- case TargetOpcode::G_USUBO:
- case TargetOpcode::G_SADDE:
- case TargetOpcode::G_SSUBE:
- case TargetOpcode::G_UADDE:
- case TargetOpcode::G_USUBE:
- return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
- case TargetOpcode::G_UMULO:
- case TargetOpcode::G_SMULO:
- return widenScalarMulo(MI, TypeIdx, WideTy);
- case TargetOpcode::G_SADDSAT:
- case TargetOpcode::G_SSUBSAT:
- case TargetOpcode::G_SSHLSAT:
- case TargetOpcode::G_UADDSAT:
- case TargetOpcode::G_USUBSAT:
- case TargetOpcode::G_USHLSAT:
- return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
- case TargetOpcode::G_CTTZ:
- case TargetOpcode::G_CTTZ_ZERO_UNDEF:
- case TargetOpcode::G_CTLZ:
- case TargetOpcode::G_CTLZ_ZERO_UNDEF:
- case TargetOpcode::G_CTPOP: {
- if (TypeIdx == 0) {
- Observer.changingInstr(MI);
- widenScalarDst(MI, WideTy, 0);
- Observer.changedInstr(MI);
- return Legalized;
- }
- Register SrcReg = MI.getOperand(1).getReg();
- // First extend the input.
- unsigned ExtOpc = MI.getOpcode() == TargetOpcode::G_CTTZ ||
- MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF
- ? TargetOpcode::G_ANYEXT
- : TargetOpcode::G_ZEXT;
- auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
- LLT CurTy = MRI.getType(SrcReg);
- unsigned NewOpc = MI.getOpcode();
- if (NewOpc == TargetOpcode::G_CTTZ) {
- // The count is the same in the larger type except if the original
- // value was zero. This can be handled by setting the bit just off
- // the top of the original type.
- auto TopBit =
- APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
- MIBSrc = MIRBuilder.buildOr(
- WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
- // Now we know the operand is non-zero, use the more relaxed opcode.
- NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
- }
- // Perform the operation at the larger size.
- auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
- // This is already the correct result for CTPOP and CTTZs
- if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
- MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
- // The correct result is NewOp - (Difference in widety and current ty).
- unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
- MIBNewOp = MIRBuilder.buildSub(
- WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
- }
- MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
- MI.eraseFromParent();
- return Legalized;
- }
- case TargetOpcode::G_BSWAP: {
- Observer.changingInstr(MI);
- Register DstReg = MI.getOperand(0).getReg();
- Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
- Register DstExt = MRI.createGenericVirtualRegister(WideTy);
- Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
- widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
- MI.getOperand(0).setReg(DstExt);
- MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
- LLT Ty = MRI.getType(DstReg);
- unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
- MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
- MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
- MIRBuilder.buildTrunc(DstReg, ShrReg);
- Observer.changedInstr(MI);
- return Legalized;
- }
- case TargetOpcode::G_BITREVERSE: {
- Observer.changingInstr(MI);
- Register DstReg = MI.getOperand(0).getReg();
- LLT Ty = MRI.getType(DstReg);
- unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
- Register DstExt = MRI.createGenericVirtualRegister(WideTy);
- widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
- MI.getOperand(0).setReg(DstExt);
- MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
- auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
- auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
- MIRBuilder.buildTrunc(DstReg, Shift);
- Observer.changedInstr(MI);
- return Legalized;
- }
- case TargetOpcode::G_FREEZE:
- Observer.changingInstr(MI);
- widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
- widenScalarDst(MI, WideTy);
- Observer.changedInstr(MI);
- return Legalized;
- case TargetOpcode::G_ABS:
- Observer.changingInstr(MI);
- widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
- widenScalarDst(MI, WideTy);
- Observer.changedInstr(MI);
- return Legalized;
- case TargetOpcode::G_ADD:
- case TargetOpcode::G_AND:
- case TargetOpcode::G_MUL:
- case TargetOpcode::G_OR:
- case TargetOpcode::G_XOR:
- case TargetOpcode::G_SUB:
- // Perform operation at larger width (any extension is fines here, high bits
- // don't affect the result) and then truncate the result back to the
- // original type.
- Observer.changingInstr(MI);
- widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
- widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
- widenScalarDst(MI, WideTy);
- Observer.changedInstr(MI);
- return Legalized;
- case TargetOpcode::G_SBFX:
- case TargetOpcode::G_UBFX:
- Observer.changingInstr(MI);
- if (TypeIdx == 0) {
- widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
- widenScalarDst(MI, WideTy);
- } else {
- widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
- widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
- }
- Observer.changedInstr(MI);
- return Legalized;
- case TargetOpcode::G_SHL:
- Observer.changingInstr(MI);
- if (TypeIdx == 0) {
- widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
- widenScalarDst(MI, WideTy);
- } else {
- assert(TypeIdx == 1);
- // The "number of bits to shift" operand must preserve its value as an
- // unsigned integer:
- widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
- }
- Observer.changedInstr(MI);
- return Legalized;
- case TargetOpcode::G_SDIV:
- case TargetOpcode::G_SREM:
- case TargetOpcode::G_SMIN:
- case TargetOpcode::G_SMAX:
- Observer.changingInstr(MI);
- widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
- widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
- widenScalarDst(MI, WideTy);
- Observer.changedInstr(MI);
- return Legalized;
- case TargetOpcode::G_SDIVREM:
- Observer.changingInstr(MI);
- widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
- widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
- widenScalarDst(MI, WideTy);
- widenScalarDst(MI, WideTy, 1);
- Observer.changedInstr(MI);
- return Legalized;
- case TargetOpcode::G_ASHR:
- case TargetOpcode::G_LSHR:
- Observer.changingInstr(MI);
- if (TypeIdx == 0) {
- unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
- TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
- widenScalarSrc(MI, WideTy, 1, CvtOp);
- widenScalarDst(MI, WideTy);
- } else {
- assert(TypeIdx == 1);
- // The "number of bits to shift" operand must preserve its value as an
- // unsigned integer:
- widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
- }
- Observer.changedInstr(MI);
- return Legalized;
- case TargetOpcode::G_UDIV:
- case TargetOpcode::G_UREM:
- case TargetOpcode::G_UMIN:
- case TargetOpcode::G_UMAX:
- Observer.changingInstr(MI);
- widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
- widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
- widenScalarDst(MI, WideTy);
- Observer.changedInstr(MI);
- return Legalized;
- case TargetOpcode::G_UDIVREM:
- Observer.changingInstr(MI);
- widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
- widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
- widenScalarDst(MI, WideTy);
- widenScalarDst(MI, WideTy, 1);
- Observer.changedInstr(MI);
- return Legalized;
- case TargetOpcode::G_SELECT:
- Observer.changingInstr(MI);
- if (TypeIdx == 0) {
- // Perform operation at larger width (any extension is fine here, high
- // bits don't affect the result) and then truncate the result back to the
- // original type.
- widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
- widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
- widenScalarDst(MI, WideTy);
- } else {
- bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
- // Explicit extension is required here since high bits affect the result.
- widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
- }
- Observer.changedInstr(MI);
- return Legalized;
- case TargetOpcode::G_FPTOSI:
- case TargetOpcode::G_FPTOUI:
- Observer.changingInstr(MI);
- if (TypeIdx == 0)
- widenScalarDst(MI, WideTy);
- else
- widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
- Observer.changedInstr(MI);
- return Legalized;
- case TargetOpcode::G_SITOFP:
- Observer.changingInstr(MI);
- if (TypeIdx == 0)
- widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
- else
- widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
- Observer.changedInstr(MI);
- return Legalized;
- case TargetOpcode::G_UITOFP:
- Observer.changingInstr(MI);
- if (TypeIdx == 0)
- widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
- else
- widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
- Observer.changedInstr(MI);
- return Legalized;
- case TargetOpcode::G_LOAD:
- case TargetOpcode::G_SEXTLOAD:
- case TargetOpcode::G_ZEXTLOAD:
- Observer.changingInstr(MI);
- widenScalarDst(MI, WideTy);
- Observer.changedInstr(MI);
- return Legalized;
- case TargetOpcode::G_STORE: {
- if (TypeIdx != 0)
- return UnableToLegalize;
- LLT Ty = MRI.getType(MI.getOperand(0).getReg());
- if (!Ty.isScalar())
- return UnableToLegalize;
- Observer.changingInstr(MI);
- unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
- TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
- widenScalarSrc(MI, WideTy, 0, ExtType);
- Observer.changedInstr(MI);
- return Legalized;
- }
- case TargetOpcode::G_CONSTANT: {
- MachineOperand &SrcMO = MI.getOperand(1);
- LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
- unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
- MRI.getType(MI.getOperand(0).getReg()));
- assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
- ExtOpc == TargetOpcode::G_ANYEXT) &&
- "Illegal Extend");
- const APInt &SrcVal = SrcMO.getCImm()->getValue();
- const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
- ? SrcVal.sext(WideTy.getSizeInBits())
- : SrcVal.zext(WideTy.getSizeInBits());
- Observer.changingInstr(MI);
- SrcMO.setCImm(ConstantInt::get(Ctx, Val));
- widenScalarDst(MI, WideTy);
- Observer.changedInstr(MI);
- return Legalized;
- }
- case TargetOpcode::G_FCONSTANT: {
- MachineOperand &SrcMO = MI.getOperand(1);
- LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
- APFloat Val = SrcMO.getFPImm()->getValueAPF();
- bool LosesInfo;
- switch (WideTy.getSizeInBits()) {
- case 32:
- Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
- &LosesInfo);
- break;
- case 64:
- Val.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
- &LosesInfo);
- break;
- default:
- return UnableToLegalize;
- }
- assert(!LosesInfo && "extend should always be lossless");
- Observer.changingInstr(MI);
- SrcMO.setFPImm(ConstantFP::get(Ctx, Val));
- widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
- Observer.changedInstr(MI);
- return Legalized;
- }
- case TargetOpcode::G_IMPLICIT_DEF: {
- Observer.changingInstr(MI);
- widenScalarDst(MI, WideTy);
- Observer.changedInstr(MI);
- return Legalized;
- }
- case TargetOpcode::G_BRCOND:
- Observer.changingInstr(MI);
- widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
- Observer.changedInstr(MI);
- return Legalized;
- case TargetOpcode::G_FCMP:
- Observer.changingInstr(MI);
- if (TypeIdx == 0)
- widenScalarDst(MI, WideTy);
- else {
- widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
- widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
- }
- Observer.changedInstr(MI);
- return Legalized;
- case TargetOpcode::G_ICMP:
- Observer.changingInstr(MI);
- if (TypeIdx == 0)
- widenScalarDst(MI, WideTy);
- else {
- unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
- MI.getOperand(1).getPredicate()))
- ? TargetOpcode::G_SEXT
- : TargetOpcode::G_ZEXT;
- widenScalarSrc(MI, WideTy, 2, ExtOpcode);
- widenScalarSrc(MI, WideTy, 3, ExtOpcode);
- }
- Observer.changedInstr(MI);
- return Legalized;
- case TargetOpcode::G_PTR_ADD:
- assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
- Observer.changingInstr(MI);
- widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
- Observer.changedInstr(MI);
- return Legalized;
- case TargetOpcode::G_PHI: {
- assert(TypeIdx == 0 && "Expecting only Idx 0");
- Observer.changingInstr(MI);
- for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
- MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
- MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
- widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
- }
- MachineBasicBlock &MBB = *MI.getParent();
- MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
- widenScalarDst(MI, WideTy);
- Observer.changedInstr(MI);
- return Legalized;
- }
- case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
- if (TypeIdx == 0) {
- Register VecReg = MI.getOperand(1).getReg();
- LLT VecTy = MRI.getType(VecReg);
- Observer.changingInstr(MI);
- widenScalarSrc(
- MI, LLT::vector(VecTy.getElementCount(), WideTy.getSizeInBits()), 1,
- TargetOpcode::G_ANYEXT);
- widenScalarDst(MI, WideTy, 0);
- Observer.changedInstr(MI);
- return Legalized;
- }
- if (TypeIdx != 2)
- return UnableToLegalize;
- Observer.changingInstr(MI);
- // TODO: Probably should be zext
- widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
- Observer.changedInstr(MI);
- return Legalized;
- }
- case TargetOpcode::G_INSERT_VECTOR_ELT: {
- if (TypeIdx == 1) {
- Observer.changingInstr(MI);
- Register VecReg = MI.getOperand(1).getReg();
- LLT VecTy = MRI.getType(VecReg);
- LLT WideVecTy = LLT::vector(VecTy.getElementCount(), WideTy);
- widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
- widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
- widenScalarDst(MI, WideVecTy, 0);
- Observer.changedInstr(MI);
- return Legalized;
- }
- if (TypeIdx == 2) {
- Observer.changingInstr(MI);
- // TODO: Probably should be zext
- widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
- Observer.changedInstr(MI);
- return Legalized;
- }
- return UnableToLegalize;
- }
- case TargetOpcode::G_FADD:
- case TargetOpcode::G_FMUL:
- case TargetOpcode::G_FSUB:
- case TargetOpcode::G_FMA:
- case TargetOpcode::G_FMAD:
- case TargetOpcode::G_FNEG:
- case TargetOpcode::G_FABS:
- case TargetOpcode::G_FCANONICALIZE:
- case TargetOpcode::G_FMINNUM:
- case TargetOpcode::G_FMAXNUM:
- case TargetOpcode::G_FMINNUM_IEEE:
- case TargetOpcode::G_FMAXNUM_IEEE:
- case TargetOpcode::G_FMINIMUM:
- case TargetOpcode::G_FMAXIMUM:
- case TargetOpcode::G_FDIV:
- case TargetOpcode::G_FREM:
- case TargetOpcode::G_FCEIL:
- case TargetOpcode::G_FFLOOR:
- case TargetOpcode::G_FCOS:
- case TargetOpcode::G_FSIN:
- case TargetOpcode::G_FLOG10:
- case TargetOpcode::G_FLOG:
- case TargetOpcode::G_FLOG2:
- case TargetOpcode::G_FRINT:
- case TargetOpcode::G_FNEARBYINT:
- case TargetOpcode::G_FSQRT:
- case TargetOpcode::G_FEXP:
- case TargetOpcode::G_FEXP2:
- case TargetOpcode::G_FPOW:
- case TargetOpcode::G_INTRINSIC_TRUNC:
- case TargetOpcode::G_INTRINSIC_ROUND:
- case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
- assert(TypeIdx == 0);
- Observer.changingInstr(MI);
- for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
- widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
- widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
- Observer.changedInstr(MI);
- return Legalized;
- case TargetOpcode::G_FPOWI: {
- if (TypeIdx != 0)
- return UnableToLegalize;
- Observer.changingInstr(MI);
- widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
- widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
- Observer.changedInstr(MI);
- return Legalized;
- }
- case TargetOpcode::G_INTTOPTR:
- if (TypeIdx != 1)
- return UnableToLegalize;
- Observer.changingInstr(MI);
- widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
- Observer.changedInstr(MI);
- return Legalized;
- case TargetOpcode::G_PTRTOINT:
- if (TypeIdx != 0)
- return UnableToLegalize;
- Observer.changingInstr(MI);
- widenScalarDst(MI, WideTy, 0);
- Observer.changedInstr(MI);
- return Legalized;
- case TargetOpcode::G_BUILD_VECTOR: {
- Observer.changingInstr(MI);
- const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
- for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
- widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
- // Avoid changing the result vector type if the source element type was
- // requested.
- if (TypeIdx == 1) {
- MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
- } else {
- widenScalarDst(MI, WideTy, 0);
- }
- Observer.changedInstr(MI);
- return Legalized;
- }
- case TargetOpcode::G_SEXT_INREG:
- if (TypeIdx != 0)
- return UnableToLegalize;
- Observer.changingInstr(MI);
- widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
- widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
- Observer.changedInstr(MI);
- return Legalized;
- case TargetOpcode::G_PTRMASK: {
- if (TypeIdx != 1)
- return UnableToLegalize;
- Observer.changingInstr(MI);
- widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
- Observer.changedInstr(MI);
- return Legalized;
- }
- }
- }
- static void getUnmergePieces(SmallVectorImpl<Register> &Pieces,
- MachineIRBuilder &B, Register Src, LLT Ty) {
- auto Unmerge = B.buildUnmerge(Ty, Src);
- for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
- Pieces.push_back(Unmerge.getReg(I));
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::lowerBitcast(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(Dst);
- LLT SrcTy = MRI.getType(Src);
- if (SrcTy.isVector()) {
- LLT SrcEltTy = SrcTy.getElementType();
- SmallVector<Register, 8> SrcRegs;
- if (DstTy.isVector()) {
- int NumDstElt = DstTy.getNumElements();
- int NumSrcElt = SrcTy.getNumElements();
- LLT DstEltTy = DstTy.getElementType();
- LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
- LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
- // If there's an element size mismatch, insert intermediate casts to match
- // the result element type.
- if (NumSrcElt < NumDstElt) { // Source element type is larger.
- // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
- //
- // =>
- //
- // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
- // %3:_(<2 x s8>) = G_BITCAST %2
- // %4:_(<2 x s8>) = G_BITCAST %3
- // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
- DstCastTy = LLT::fixed_vector(NumDstElt / NumSrcElt, DstEltTy);
- SrcPartTy = SrcEltTy;
- } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
- //
- // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
- //
- // =>
- //
- // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
- // %3:_(s16) = G_BITCAST %2
- // %4:_(s16) = G_BITCAST %3
- // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
- SrcPartTy = LLT::fixed_vector(NumSrcElt / NumDstElt, SrcEltTy);
- DstCastTy = DstEltTy;
- }
- getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
- for (Register &SrcReg : SrcRegs)
- SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
- } else
- getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
- MIRBuilder.buildMerge(Dst, SrcRegs);
- MI.eraseFromParent();
- return Legalized;
- }
- if (DstTy.isVector()) {
- SmallVector<Register, 8> SrcRegs;
- getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
- MIRBuilder.buildMerge(Dst, SrcRegs);
- MI.eraseFromParent();
- return Legalized;
- }
- return UnableToLegalize;
- }
- /// Figure out the bit offset into a register when coercing a vector index for
- /// the wide element type. This is only for the case when promoting vector to
- /// one with larger elements.
- //
- ///
- /// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
- /// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
- static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B,
- Register Idx,
- unsigned NewEltSize,
- unsigned OldEltSize) {
- const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
- LLT IdxTy = B.getMRI()->getType(Idx);
- // Now figure out the amount we need to shift to get the target bits.
- auto OffsetMask = B.buildConstant(
- IdxTy, ~(APInt::getAllOnes(IdxTy.getSizeInBits()) << Log2EltRatio));
- auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
- return B.buildShl(IdxTy, OffsetIdx,
- B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
- }
- /// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
- /// is casting to a vector with a smaller element size, perform multiple element
- /// extracts and merge the results. If this is coercing to a vector with larger
- /// elements, index the bitcasted vector and extract the target element with bit
- /// operations. This is intended to force the indexing in the native register
- /// size for architectures that can dynamically index the register file.
- LegalizerHelper::LegalizeResult
- LegalizerHelper::bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx,
- LLT CastTy) {
- if (TypeIdx != 1)
- return UnableToLegalize;
- Register Dst = MI.getOperand(0).getReg();
- Register SrcVec = MI.getOperand(1).getReg();
- Register Idx = MI.getOperand(2).getReg();
- LLT SrcVecTy = MRI.getType(SrcVec);
- LLT IdxTy = MRI.getType(Idx);
- LLT SrcEltTy = SrcVecTy.getElementType();
- unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
- unsigned OldNumElts = SrcVecTy.getNumElements();
- LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
- Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
- const unsigned NewEltSize = NewEltTy.getSizeInBits();
- const unsigned OldEltSize = SrcEltTy.getSizeInBits();
- if (NewNumElts > OldNumElts) {
- // Decreasing the vector element size
- //
- // e.g. i64 = extract_vector_elt x:v2i64, y:i32
- // =>
- // v4i32:castx = bitcast x:v2i64
- //
- // i64 = bitcast
- // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
- // (i32 (extract_vector_elt castx, (2 * y + 1)))
- //
- if (NewNumElts % OldNumElts != 0)
- return UnableToLegalize;
- // Type of the intermediate result vector.
- const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
- LLT MidTy =
- LLT::scalarOrVector(ElementCount::getFixed(NewEltsPerOldElt), NewEltTy);
- auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
- SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
- auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
- for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
- auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
- auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
- auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
- NewOps[I] = Elt.getReg(0);
- }
- auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
- MIRBuilder.buildBitcast(Dst, NewVec);
- MI.eraseFromParent();
- return Legalized;
- }
- if (NewNumElts < OldNumElts) {
- if (NewEltSize % OldEltSize != 0)
- return UnableToLegalize;
- // This only depends on powers of 2 because we use bit tricks to figure out
- // the bit offset we need to shift to get the target element. A general
- // expansion could emit division/multiply.
- if (!isPowerOf2_32(NewEltSize / OldEltSize))
- return UnableToLegalize;
- // Increasing the vector element size.
- // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
- //
- // =>
- //
- // %cast = G_BITCAST %vec
- // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
- // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
- // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
- // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
- // %elt_bits = G_LSHR %wide_elt, %offset_bits
- // %elt = G_TRUNC %elt_bits
- const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
- auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
- // Divide to get the index in the wider element type.
- auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
- Register WideElt = CastVec;
- if (CastTy.isVector()) {
- WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
- ScaledIdx).getReg(0);
- }
- // Compute the bit offset into the register of the target element.
- Register OffsetBits = getBitcastWiderVectorElementOffset(
- MIRBuilder, Idx, NewEltSize, OldEltSize);
- // Shift the wide element to get the target element.
- auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
- MIRBuilder.buildTrunc(Dst, ExtractedBits);
- MI.eraseFromParent();
- return Legalized;
- }
- return UnableToLegalize;
- }
- /// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
- /// TargetReg, while preserving other bits in \p TargetReg.
- ///
- /// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
- static Register buildBitFieldInsert(MachineIRBuilder &B,
- Register TargetReg, Register InsertReg,
- Register OffsetBits) {
- LLT TargetTy = B.getMRI()->getType(TargetReg);
- LLT InsertTy = B.getMRI()->getType(InsertReg);
- auto ZextVal = B.buildZExt(TargetTy, InsertReg);
- auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
- // Produce a bitmask of the value to insert
- auto EltMask = B.buildConstant(
- TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
- InsertTy.getSizeInBits()));
- // Shift it into position
- auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
- auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
- // Clear out the bits in the wide element
- auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
- // The value to insert has all zeros already, so stick it into the masked
- // wide element.
- return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
- }
- /// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
- /// is increasing the element size, perform the indexing in the target element
- /// type, and use bit operations to insert at the element position. This is
- /// intended for architectures that can dynamically index the register file and
- /// want to force indexing in the native register size.
- LegalizerHelper::LegalizeResult
- LegalizerHelper::bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx,
- LLT CastTy) {
- if (TypeIdx != 0)
- return UnableToLegalize;
- Register Dst = MI.getOperand(0).getReg();
- Register SrcVec = MI.getOperand(1).getReg();
- Register Val = MI.getOperand(2).getReg();
- Register Idx = MI.getOperand(3).getReg();
- LLT VecTy = MRI.getType(Dst);
- LLT IdxTy = MRI.getType(Idx);
- LLT VecEltTy = VecTy.getElementType();
- LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
- const unsigned NewEltSize = NewEltTy.getSizeInBits();
- const unsigned OldEltSize = VecEltTy.getSizeInBits();
- unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
- unsigned OldNumElts = VecTy.getNumElements();
- Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
- if (NewNumElts < OldNumElts) {
- if (NewEltSize % OldEltSize != 0)
- return UnableToLegalize;
- // This only depends on powers of 2 because we use bit tricks to figure out
- // the bit offset we need to shift to get the target element. A general
- // expansion could emit division/multiply.
- if (!isPowerOf2_32(NewEltSize / OldEltSize))
- return UnableToLegalize;
- const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
- auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
- // Divide to get the index in the wider element type.
- auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
- Register ExtractedElt = CastVec;
- if (CastTy.isVector()) {
- ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
- ScaledIdx).getReg(0);
- }
- // Compute the bit offset into the register of the target element.
- Register OffsetBits = getBitcastWiderVectorElementOffset(
- MIRBuilder, Idx, NewEltSize, OldEltSize);
- Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
- Val, OffsetBits);
- if (CastTy.isVector()) {
- InsertedElt = MIRBuilder.buildInsertVectorElement(
- CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
- }
- MIRBuilder.buildBitcast(Dst, InsertedElt);
- MI.eraseFromParent();
- return Legalized;
- }
- return UnableToLegalize;
- }
- LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) {
- // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
- Register DstReg = LoadMI.getDstReg();
- Register PtrReg = LoadMI.getPointerReg();
- LLT DstTy = MRI.getType(DstReg);
- MachineMemOperand &MMO = LoadMI.getMMO();
- LLT MemTy = MMO.getMemoryType();
- MachineFunction &MF = MIRBuilder.getMF();
- unsigned MemSizeInBits = MemTy.getSizeInBits();
- unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
- if (MemSizeInBits != MemStoreSizeInBits) {
- if (MemTy.isVector())
- return UnableToLegalize;
- // Promote to a byte-sized load if not loading an integral number of
- // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
- LLT WideMemTy = LLT::scalar(MemStoreSizeInBits);
- MachineMemOperand *NewMMO =
- MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy);
- Register LoadReg = DstReg;
- LLT LoadTy = DstTy;
- // If this wasn't already an extending load, we need to widen the result
- // register to avoid creating a load with a narrower result than the source.
- if (MemStoreSizeInBits > DstTy.getSizeInBits()) {
- LoadTy = WideMemTy;
- LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
- }
- if (isa<GSExtLoad>(LoadMI)) {
- auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
- MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
- } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == DstTy) {
- auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
- // The extra bits are guaranteed to be zero, since we stored them that
- // way. A zext load from Wide thus automatically gives zext from MemVT.
- MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
- } else {
- MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
- }
- if (DstTy != LoadTy)
- MIRBuilder.buildTrunc(DstReg, LoadReg);
- LoadMI.eraseFromParent();
- return Legalized;
- }
- // Big endian lowering not implemented.
- if (MIRBuilder.getDataLayout().isBigEndian())
- return UnableToLegalize;
- // This load needs splitting into power of 2 sized loads.
- //
- // Our strategy here is to generate anyextending loads for the smaller
- // types up to next power-2 result type, and then combine the two larger
- // result values together, before truncating back down to the non-pow-2
- // type.
- // E.g. v1 = i24 load =>
- // v2 = i32 zextload (2 byte)
- // v3 = i32 load (1 byte)
- // v4 = i32 shl v3, 16
- // v5 = i32 or v4, v2
- // v1 = i24 trunc v5
- // By doing this we generate the correct truncate which should get
- // combined away as an artifact with a matching extend.
- uint64_t LargeSplitSize, SmallSplitSize;
- if (!isPowerOf2_32(MemSizeInBits)) {
- // This load needs splitting into power of 2 sized loads.
- LargeSplitSize = PowerOf2Floor(MemSizeInBits);
- SmallSplitSize = MemSizeInBits - LargeSplitSize;
- } else {
- // This is already a power of 2, but we still need to split this in half.
- //
- // Assume we're being asked to decompose an unaligned load.
- // TODO: If this requires multiple splits, handle them all at once.
- auto &Ctx = MF.getFunction().getContext();
- if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
- return UnableToLegalize;
- SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
- }
- if (MemTy.isVector()) {
- // TODO: Handle vector extloads
- if (MemTy != DstTy)
- return UnableToLegalize;
- // TODO: We can do better than scalarizing the vector and at least split it
- // in half.
- return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType());
- }
- MachineMemOperand *LargeMMO =
- MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
- MachineMemOperand *SmallMMO =
- MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
- LLT PtrTy = MRI.getType(PtrReg);
- unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits());
- LLT AnyExtTy = LLT::scalar(AnyExtSize);
- auto LargeLoad = MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
- PtrReg, *LargeMMO);
- auto OffsetCst = MIRBuilder.buildConstant(LLT::scalar(PtrTy.getSizeInBits()),
- LargeSplitSize / 8);
- Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
- auto SmallPtr = MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst);
- auto SmallLoad = MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), AnyExtTy,
- SmallPtr, *SmallMMO);
- auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
- auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
- if (AnyExtTy == DstTy)
- MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
- else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) {
- auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
- MIRBuilder.buildTrunc(DstReg, {Or});
- } else {
- assert(DstTy.isPointer() && "expected pointer");
- auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
- // FIXME: We currently consider this to be illegal for non-integral address
- // spaces, but we need still need a way to reinterpret the bits.
- MIRBuilder.buildIntToPtr(DstReg, Or);
- }
- LoadMI.eraseFromParent();
- return Legalized;
- }
- LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) {
- // Lower a non-power of 2 store into multiple pow-2 stores.
- // E.g. split an i24 store into an i16 store + i8 store.
- // We do this by first extending the stored value to the next largest power
- // of 2 type, and then using truncating stores to store the components.
- // By doing this, likewise with G_LOAD, generate an extend that can be
- // artifact-combined away instead of leaving behind extracts.
- Register SrcReg = StoreMI.getValueReg();
- Register PtrReg = StoreMI.getPointerReg();
- LLT SrcTy = MRI.getType(SrcReg);
- MachineFunction &MF = MIRBuilder.getMF();
- MachineMemOperand &MMO = **StoreMI.memoperands_begin();
- LLT MemTy = MMO.getMemoryType();
- unsigned StoreWidth = MemTy.getSizeInBits();
- unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
- if (StoreWidth != StoreSizeInBits) {
- if (SrcTy.isVector())
- return UnableToLegalize;
- // Promote to a byte-sized store with upper bits zero if not
- // storing an integral number of bytes. For example, promote
- // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
- LLT WideTy = LLT::scalar(StoreSizeInBits);
- if (StoreSizeInBits > SrcTy.getSizeInBits()) {
- // Avoid creating a store with a narrower source than result.
- SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
- SrcTy = WideTy;
- }
- auto ZextInReg = MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
- MachineMemOperand *NewMMO =
- MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy);
- MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
- StoreMI.eraseFromParent();
- return Legalized;
- }
- if (MemTy.isVector()) {
- // TODO: Handle vector trunc stores
- if (MemTy != SrcTy)
- return UnableToLegalize;
- // TODO: We can do better than scalarizing the vector and at least split it
- // in half.
- return reduceLoadStoreWidth(StoreMI, 0, SrcTy.getElementType());
- }
- unsigned MemSizeInBits = MemTy.getSizeInBits();
- uint64_t LargeSplitSize, SmallSplitSize;
- if (!isPowerOf2_32(MemSizeInBits)) {
- LargeSplitSize = PowerOf2Floor(MemTy.getSizeInBits());
- SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
- } else {
- auto &Ctx = MF.getFunction().getContext();
- if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
- return UnableToLegalize; // Don't know what we're being asked to do.
- SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
- }
- // Extend to the next pow-2. If this store was itself the result of lowering,
- // e.g. an s56 store being broken into s32 + s24, we might have a stored type
- // that's wider than the stored size.
- unsigned AnyExtSize = PowerOf2Ceil(MemTy.getSizeInBits());
- const LLT NewSrcTy = LLT::scalar(AnyExtSize);
- if (SrcTy.isPointer()) {
- const LLT IntPtrTy = LLT::scalar(SrcTy.getSizeInBits());
- SrcReg = MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
- }
- auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
- // Obtain the smaller value by shifting away the larger value.
- auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
- auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
- // Generate the PtrAdd and truncating stores.
- LLT PtrTy = MRI.getType(PtrReg);
- auto OffsetCst = MIRBuilder.buildConstant(
- LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
- auto SmallPtr =
- MIRBuilder.buildPtrAdd(PtrTy, PtrReg, OffsetCst);
- MachineMemOperand *LargeMMO =
- MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
- MachineMemOperand *SmallMMO =
- MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
- MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
- MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
- StoreMI.eraseFromParent();
- return Legalized;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
- switch (MI.getOpcode()) {
- case TargetOpcode::G_LOAD: {
- if (TypeIdx != 0)
- return UnableToLegalize;
- MachineMemOperand &MMO = **MI.memoperands_begin();
- // Not sure how to interpret a bitcast of an extending load.
- if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
- return UnableToLegalize;
- Observer.changingInstr(MI);
- bitcastDst(MI, CastTy, 0);
- MMO.setType(CastTy);
- Observer.changedInstr(MI);
- return Legalized;
- }
- case TargetOpcode::G_STORE: {
- if (TypeIdx != 0)
- return UnableToLegalize;
- MachineMemOperand &MMO = **MI.memoperands_begin();
- // Not sure how to interpret a bitcast of a truncating store.
- if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
- return UnableToLegalize;
- Observer.changingInstr(MI);
- bitcastSrc(MI, CastTy, 0);
- MMO.setType(CastTy);
- Observer.changedInstr(MI);
- return Legalized;
- }
- case TargetOpcode::G_SELECT: {
- if (TypeIdx != 0)
- return UnableToLegalize;
- if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
- LLVM_DEBUG(
- dbgs() << "bitcast action not implemented for vector select\n");
- return UnableToLegalize;
- }
- Observer.changingInstr(MI);
- bitcastSrc(MI, CastTy, 2);
- bitcastSrc(MI, CastTy, 3);
- bitcastDst(MI, CastTy, 0);
- Observer.changedInstr(MI);
- return Legalized;
- }
- case TargetOpcode::G_AND:
- case TargetOpcode::G_OR:
- case TargetOpcode::G_XOR: {
- Observer.changingInstr(MI);
- bitcastSrc(MI, CastTy, 1);
- bitcastSrc(MI, CastTy, 2);
- bitcastDst(MI, CastTy, 0);
- Observer.changedInstr(MI);
- return Legalized;
- }
- case TargetOpcode::G_EXTRACT_VECTOR_ELT:
- return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
- case TargetOpcode::G_INSERT_VECTOR_ELT:
- return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
- default:
- return UnableToLegalize;
- }
- }
- // Legalize an instruction by changing the opcode in place.
- void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
- Observer.changingInstr(MI);
- MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
- Observer.changedInstr(MI);
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
- using namespace TargetOpcode;
- switch(MI.getOpcode()) {
- default:
- return UnableToLegalize;
- case TargetOpcode::G_BITCAST:
- return lowerBitcast(MI);
- case TargetOpcode::G_SREM:
- case TargetOpcode::G_UREM: {
- LLT Ty = MRI.getType(MI.getOperand(0).getReg());
- auto Quot =
- MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
- {MI.getOperand(1), MI.getOperand(2)});
- auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
- MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
- MI.eraseFromParent();
- return Legalized;
- }
- case TargetOpcode::G_SADDO:
- case TargetOpcode::G_SSUBO:
- return lowerSADDO_SSUBO(MI);
- case TargetOpcode::G_UMULH:
- case TargetOpcode::G_SMULH:
- return lowerSMULH_UMULH(MI);
- case TargetOpcode::G_SMULO:
- case TargetOpcode::G_UMULO: {
- // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
- // result.
- Register Res = MI.getOperand(0).getReg();
- Register Overflow = MI.getOperand(1).getReg();
- Register LHS = MI.getOperand(2).getReg();
- Register RHS = MI.getOperand(3).getReg();
- LLT Ty = MRI.getType(Res);
- unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
- ? TargetOpcode::G_SMULH
- : TargetOpcode::G_UMULH;
- Observer.changingInstr(MI);
- const auto &TII = MIRBuilder.getTII();
- MI.setDesc(TII.get(TargetOpcode::G_MUL));
- MI.RemoveOperand(1);
- Observer.changedInstr(MI);
- auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
- auto Zero = MIRBuilder.buildConstant(Ty, 0);
- // Move insert point forward so we can use the Res register if needed.
- MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
- // For *signed* multiply, overflow is detected by checking:
- // (hi != (lo >> bitwidth-1))
- if (Opcode == TargetOpcode::G_SMULH) {
- auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
- auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
- MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
- } else {
- MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
- }
- return Legalized;
- }
- case TargetOpcode::G_FNEG: {
- Register Res = MI.getOperand(0).getReg();
- LLT Ty = MRI.getType(Res);
- // TODO: Handle vector types once we are able to
- // represent them.
- if (Ty.isVector())
- return UnableToLegalize;
- auto SignMask =
- MIRBuilder.buildConstant(Ty, APInt::getSignMask(Ty.getSizeInBits()));
- Register SubByReg = MI.getOperand(1).getReg();
- MIRBuilder.buildXor(Res, SubByReg, SignMask);
- MI.eraseFromParent();
- return Legalized;
- }
- case TargetOpcode::G_FSUB: {
- Register Res = MI.getOperand(0).getReg();
- LLT Ty = MRI.getType(Res);
- // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
- // First, check if G_FNEG is marked as Lower. If so, we may
- // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
- if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
- return UnableToLegalize;
- Register LHS = MI.getOperand(1).getReg();
- Register RHS = MI.getOperand(2).getReg();
- Register Neg = MRI.createGenericVirtualRegister(Ty);
- MIRBuilder.buildFNeg(Neg, RHS);
- MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
- MI.eraseFromParent();
- return Legalized;
- }
- case TargetOpcode::G_FMAD:
- return lowerFMad(MI);
- case TargetOpcode::G_FFLOOR:
- return lowerFFloor(MI);
- case TargetOpcode::G_INTRINSIC_ROUND:
- return lowerIntrinsicRound(MI);
- case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
- // Since round even is the assumed rounding mode for unconstrained FP
- // operations, rint and roundeven are the same operation.
- changeOpcode(MI, TargetOpcode::G_FRINT);
- return Legalized;
- }
- case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
- Register OldValRes = MI.getOperand(0).getReg();
- Register SuccessRes = MI.getOperand(1).getReg();
- Register Addr = MI.getOperand(2).getReg();
- Register CmpVal = MI.getOperand(3).getReg();
- Register NewVal = MI.getOperand(4).getReg();
- MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
- **MI.memoperands_begin());
- MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
- MI.eraseFromParent();
- return Legalized;
- }
- case TargetOpcode::G_LOAD:
- case TargetOpcode::G_SEXTLOAD:
- case TargetOpcode::G_ZEXTLOAD:
- return lowerLoad(cast<GAnyLoad>(MI));
- case TargetOpcode::G_STORE:
- return lowerStore(cast<GStore>(MI));
- case TargetOpcode::G_CTLZ_ZERO_UNDEF:
- case TargetOpcode::G_CTTZ_ZERO_UNDEF:
- case TargetOpcode::G_CTLZ:
- case TargetOpcode::G_CTTZ:
- case TargetOpcode::G_CTPOP:
- return lowerBitCount(MI);
- case G_UADDO: {
- Register Res = MI.getOperand(0).getReg();
- Register CarryOut = MI.getOperand(1).getReg();
- Register LHS = MI.getOperand(2).getReg();
- Register RHS = MI.getOperand(3).getReg();
- MIRBuilder.buildAdd(Res, LHS, RHS);
- MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS);
- MI.eraseFromParent();
- return Legalized;
- }
- case G_UADDE: {
- Register Res = MI.getOperand(0).getReg();
- Register CarryOut = MI.getOperand(1).getReg();
- Register LHS = MI.getOperand(2).getReg();
- Register RHS = MI.getOperand(3).getReg();
- Register CarryIn = MI.getOperand(4).getReg();
- LLT Ty = MRI.getType(Res);
- auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
- auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
- MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
- MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS);
- MI.eraseFromParent();
- return Legalized;
- }
- case G_USUBO: {
- Register Res = MI.getOperand(0).getReg();
- Register BorrowOut = MI.getOperand(1).getReg();
- Register LHS = MI.getOperand(2).getReg();
- Register RHS = MI.getOperand(3).getReg();
- MIRBuilder.buildSub(Res, LHS, RHS);
- MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
- MI.eraseFromParent();
- return Legalized;
- }
- case G_USUBE: {
- Register Res = MI.getOperand(0).getReg();
- Register BorrowOut = MI.getOperand(1).getReg();
- Register LHS = MI.getOperand(2).getReg();
- Register RHS = MI.getOperand(3).getReg();
- Register BorrowIn = MI.getOperand(4).getReg();
- const LLT CondTy = MRI.getType(BorrowOut);
- const LLT Ty = MRI.getType(Res);
- auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
- auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
- MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
- auto LHS_EQ_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, LHS, RHS);
- auto LHS_ULT_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, LHS, RHS);
- MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS);
- MI.eraseFromParent();
- return Legalized;
- }
- case G_UITOFP:
- return lowerUITOFP(MI);
- case G_SITOFP:
- return lowerSITOFP(MI);
- case G_FPTOUI:
- return lowerFPTOUI(MI);
- case G_FPTOSI:
- return lowerFPTOSI(MI);
- case G_FPTRUNC:
- return lowerFPTRUNC(MI);
- case G_FPOWI:
- return lowerFPOWI(MI);
- case G_SMIN:
- case G_SMAX:
- case G_UMIN:
- case G_UMAX:
- return lowerMinMax(MI);
- case G_FCOPYSIGN:
- return lowerFCopySign(MI);
- case G_FMINNUM:
- case G_FMAXNUM:
- return lowerFMinNumMaxNum(MI);
- case G_MERGE_VALUES:
- return lowerMergeValues(MI);
- case G_UNMERGE_VALUES:
- return lowerUnmergeValues(MI);
- case TargetOpcode::G_SEXT_INREG: {
- assert(MI.getOperand(2).isImm() && "Expected immediate");
- int64_t SizeInBits = MI.getOperand(2).getImm();
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(DstReg);
- Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
- auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
- MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
- MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
- MI.eraseFromParent();
- return Legalized;
- }
- case G_EXTRACT_VECTOR_ELT:
- case G_INSERT_VECTOR_ELT:
- return lowerExtractInsertVectorElt(MI);
- case G_SHUFFLE_VECTOR:
- return lowerShuffleVector(MI);
- case G_DYN_STACKALLOC:
- return lowerDynStackAlloc(MI);
- case G_EXTRACT:
- return lowerExtract(MI);
- case G_INSERT:
- return lowerInsert(MI);
- case G_BSWAP:
- return lowerBswap(MI);
- case G_BITREVERSE:
- return lowerBitreverse(MI);
- case G_READ_REGISTER:
- case G_WRITE_REGISTER:
- return lowerReadWriteRegister(MI);
- case G_UADDSAT:
- case G_USUBSAT: {
- // Try to make a reasonable guess about which lowering strategy to use. The
- // target can override this with custom lowering and calling the
- // implementation functions.
- LLT Ty = MRI.getType(MI.getOperand(0).getReg());
- if (LI.isLegalOrCustom({G_UMIN, Ty}))
- return lowerAddSubSatToMinMax(MI);
- return lowerAddSubSatToAddoSubo(MI);
- }
- case G_SADDSAT:
- case G_SSUBSAT: {
- LLT Ty = MRI.getType(MI.getOperand(0).getReg());
- // FIXME: It would probably make more sense to see if G_SADDO is preferred,
- // since it's a shorter expansion. However, we would need to figure out the
- // preferred boolean type for the carry out for the query.
- if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
- return lowerAddSubSatToMinMax(MI);
- return lowerAddSubSatToAddoSubo(MI);
- }
- case G_SSHLSAT:
- case G_USHLSAT:
- return lowerShlSat(MI);
- case G_ABS:
- return lowerAbsToAddXor(MI);
- case G_SELECT:
- return lowerSelect(MI);
- case G_SDIVREM:
- case G_UDIVREM:
- return lowerDIVREM(MI);
- case G_FSHL:
- case G_FSHR:
- return lowerFunnelShift(MI);
- case G_ROTL:
- case G_ROTR:
- return lowerRotate(MI);
- case G_MEMSET:
- case G_MEMCPY:
- case G_MEMMOVE:
- return lowerMemCpyFamily(MI);
- case G_MEMCPY_INLINE:
- return lowerMemcpyInline(MI);
- GISEL_VECREDUCE_CASES_NONSEQ
- return lowerVectorReduction(MI);
- }
- }
- Align LegalizerHelper::getStackTemporaryAlignment(LLT Ty,
- Align MinAlign) const {
- // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
- // datalayout for the preferred alignment. Also there should be a target hook
- // for this to allow targets to reduce the alignment and ignore the
- // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
- // the type.
- return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign);
- }
- MachineInstrBuilder
- LegalizerHelper::createStackTemporary(TypeSize Bytes, Align Alignment,
- MachinePointerInfo &PtrInfo) {
- MachineFunction &MF = MIRBuilder.getMF();
- const DataLayout &DL = MIRBuilder.getDataLayout();
- int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
- unsigned AddrSpace = DL.getAllocaAddrSpace();
- LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
- PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
- return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
- }
- static Register clampDynamicVectorIndex(MachineIRBuilder &B, Register IdxReg,
- LLT VecTy) {
- int64_t IdxVal;
- if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal)))
- return IdxReg;
- LLT IdxTy = B.getMRI()->getType(IdxReg);
- unsigned NElts = VecTy.getNumElements();
- if (isPowerOf2_32(NElts)) {
- APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
- return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
- }
- return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
- .getReg(0);
- }
- Register LegalizerHelper::getVectorElementPointer(Register VecPtr, LLT VecTy,
- Register Index) {
- LLT EltTy = VecTy.getElementType();
- // Calculate the element offset and add it to the pointer.
- unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
- assert(EltSize * 8 == EltTy.getSizeInBits() &&
- "Converting bits to bytes lost precision");
- Index = clampDynamicVectorIndex(MIRBuilder, Index, VecTy);
- LLT IdxTy = MRI.getType(Index);
- auto Mul = MIRBuilder.buildMul(IdxTy, Index,
- MIRBuilder.buildConstant(IdxTy, EltSize));
- LLT PtrTy = MRI.getType(VecPtr);
- return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
- }
- #ifndef NDEBUG
- /// Check that all vector operands have same number of elements. Other operands
- /// should be listed in NonVecOp.
- static bool hasSameNumEltsOnAllVectorOperands(
- GenericMachineInstr &MI, MachineRegisterInfo &MRI,
- std::initializer_list<unsigned> NonVecOpIndices) {
- if (MI.getNumMemOperands() != 0)
- return false;
- LLT VecTy = MRI.getType(MI.getReg(0));
- if (!VecTy.isVector())
- return false;
- unsigned NumElts = VecTy.getNumElements();
- for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
- MachineOperand &Op = MI.getOperand(OpIdx);
- if (!Op.isReg()) {
- if (!is_contained(NonVecOpIndices, OpIdx))
- return false;
- continue;
- }
- LLT Ty = MRI.getType(Op.getReg());
- if (!Ty.isVector()) {
- if (!is_contained(NonVecOpIndices, OpIdx))
- return false;
- continue;
- }
- if (Ty.getNumElements() != NumElts)
- return false;
- }
- return true;
- }
- #endif
- /// Fill \p DstOps with DstOps that have same number of elements combined as
- /// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are
- /// vectors with \p NumElts elements. When Ty.getNumElements() is not multiple
- /// of \p NumElts last DstOp (leftover) has fewer then \p NumElts elements.
- static void makeDstOps(SmallVectorImpl<DstOp> &DstOps, LLT Ty,
- unsigned NumElts) {
- LLT LeftoverTy;
- assert(Ty.isVector() && "Expected vector type");
- LLT EltTy = Ty.getElementType();
- LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
- int NumParts, NumLeftover;
- std::tie(NumParts, NumLeftover) =
- getNarrowTypeBreakDown(Ty, NarrowTy, LeftoverTy);
- assert(NumParts > 0 && "Error in getNarrowTypeBreakDown");
- for (int i = 0; i < NumParts; ++i) {
- DstOps.push_back(NarrowTy);
- }
- if (LeftoverTy.isValid()) {
- assert(NumLeftover == 1 && "expected exactly one leftover");
- DstOps.push_back(LeftoverTy);
- }
- }
- /// Operand \p Op is used on \p N sub-instructions. Fill \p Ops with \p N SrcOps
- /// made from \p Op depending on operand type.
- static void broadcastSrcOp(SmallVectorImpl<SrcOp> &Ops, unsigned N,
- MachineOperand &Op) {
- for (unsigned i = 0; i < N; ++i) {
- if (Op.isReg())
- Ops.push_back(Op.getReg());
- else if (Op.isImm())
- Ops.push_back(Op.getImm());
- else if (Op.isPredicate())
- Ops.push_back(static_cast<CmpInst::Predicate>(Op.getPredicate()));
- else
- llvm_unreachable("Unsupported type");
- }
- }
- // Handle splitting vector operations which need to have the same number of
- // elements in each type index, but each type index may have a different element
- // type.
- //
- // e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
- // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
- // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
- //
- // Also handles some irregular breakdown cases, e.g.
- // e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
- // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
- // s64 = G_SHL s64, s32
- LegalizerHelper::LegalizeResult
- LegalizerHelper::fewerElementsVectorMultiEltType(
- GenericMachineInstr &MI, unsigned NumElts,
- std::initializer_list<unsigned> NonVecOpIndices) {
- assert(hasSameNumEltsOnAllVectorOperands(MI, MRI, NonVecOpIndices) &&
- "Non-compatible opcode or not specified non-vector operands");
- unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
- unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
- unsigned NumDefs = MI.getNumDefs();
- // Create DstOps (sub-vectors with NumElts elts + Leftover) for each output.
- // Build instructions with DstOps to use instruction found by CSE directly.
- // CSE copies found instruction into given vreg when building with vreg dest.
- SmallVector<SmallVector<DstOp, 8>, 2> OutputOpsPieces(NumDefs);
- // Output registers will be taken from created instructions.
- SmallVector<SmallVector<Register, 8>, 2> OutputRegs(NumDefs);
- for (unsigned i = 0; i < NumDefs; ++i) {
- makeDstOps(OutputOpsPieces[i], MRI.getType(MI.getReg(i)), NumElts);
- }
- // Split vector input operands into sub-vectors with NumElts elts + Leftover.
- // Operands listed in NonVecOpIndices will be used as is without splitting;
- // examples: compare predicate in icmp and fcmp (op 1), vector select with i1
- // scalar condition (op 1), immediate in sext_inreg (op 2).
- SmallVector<SmallVector<SrcOp, 8>, 3> InputOpsPieces(NumInputs);
- for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
- ++UseIdx, ++UseNo) {
- if (is_contained(NonVecOpIndices, UseIdx)) {
- broadcastSrcOp(InputOpsPieces[UseNo], OutputOpsPieces[0].size(),
- MI.getOperand(UseIdx));
- } else {
- SmallVector<Register, 8> SplitPieces;
- extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces);
- for (auto Reg : SplitPieces)
- InputOpsPieces[UseNo].push_back(Reg);
- }
- }
- unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
- // Take i-th piece of each input operand split and build sub-vector/scalar
- // instruction. Set i-th DstOp(s) from OutputOpsPieces as destination(s).
- for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
- SmallVector<DstOp, 2> Defs;
- for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
- Defs.push_back(OutputOpsPieces[DstNo][i]);
- SmallVector<SrcOp, 3> Uses;
- for (unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
- Uses.push_back(InputOpsPieces[InputNo][i]);
- auto I = MIRBuilder.buildInstr(MI.getOpcode(), Defs, Uses, MI.getFlags());
- for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
- OutputRegs[DstNo].push_back(I.getReg(DstNo));
- }
- // Merge small outputs into MI's output for each def operand.
- if (NumLeftovers) {
- for (unsigned i = 0; i < NumDefs; ++i)
- mergeMixedSubvectors(MI.getReg(i), OutputRegs[i]);
- } else {
- for (unsigned i = 0; i < NumDefs; ++i)
- MIRBuilder.buildMerge(MI.getReg(i), OutputRegs[i]);
- }
- MI.eraseFromParent();
- return Legalized;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::fewerElementsVectorPhi(GenericMachineInstr &MI,
- unsigned NumElts) {
- unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
- unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
- unsigned NumDefs = MI.getNumDefs();
- SmallVector<DstOp, 8> OutputOpsPieces;
- SmallVector<Register, 8> OutputRegs;
- makeDstOps(OutputOpsPieces, MRI.getType(MI.getReg(0)), NumElts);
- // Instructions that perform register split will be inserted in basic block
- // where register is defined (basic block is in the next operand).
- SmallVector<SmallVector<Register, 8>, 3> InputOpsPieces(NumInputs / 2);
- for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
- UseIdx += 2, ++UseNo) {
- MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB();
- MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
- extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo]);
- }
- // Build PHIs with fewer elements.
- unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
- MIRBuilder.setInsertPt(*MI.getParent(), MI);
- for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
- auto Phi = MIRBuilder.buildInstr(TargetOpcode::G_PHI);
- Phi.addDef(
- MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
- OutputRegs.push_back(Phi.getReg(0));
- for (unsigned j = 0; j < NumInputs / 2; ++j) {
- Phi.addUse(InputOpsPieces[j][i]);
- Phi.add(MI.getOperand(1 + j * 2 + 1));
- }
- }
- // Merge small outputs into MI's def.
- if (NumLeftovers) {
- mergeMixedSubvectors(MI.getReg(0), OutputRegs);
- } else {
- MIRBuilder.buildMerge(MI.getReg(0), OutputRegs);
- }
- MI.eraseFromParent();
- return Legalized;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI,
- unsigned TypeIdx,
- LLT NarrowTy) {
- const int NumDst = MI.getNumOperands() - 1;
- const Register SrcReg = MI.getOperand(NumDst).getReg();
- LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
- LLT SrcTy = MRI.getType(SrcReg);
- if (TypeIdx != 1 || NarrowTy == DstTy)
- return UnableToLegalize;
- // Requires compatible types. Otherwise SrcReg should have been defined by
- // merge-like instruction that would get artifact combined. Most likely
- // instruction that defines SrcReg has to perform more/fewer elements
- // legalization compatible with NarrowTy.
- assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types");
- assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
- if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
- (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0))
- return UnableToLegalize;
- // This is most likely DstTy (smaller then register size) packed in SrcTy
- // (larger then register size) and since unmerge was not combined it will be
- // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy
- // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy.
- // %1:_(DstTy), %2, %3, %4 = G_UNMERGE_VALUES %0:_(SrcTy)
- //
- // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence
- // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg
- // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy)
- auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
- const int NumUnmerge = Unmerge->getNumOperands() - 1;
- const int PartsPerUnmerge = NumDst / NumUnmerge;
- for (int I = 0; I != NumUnmerge; ++I) {
- auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
- for (int J = 0; J != PartsPerUnmerge; ++J)
- MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
- MIB.addUse(Unmerge.getReg(I));
- }
- MI.eraseFromParent();
- return Legalized;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx,
- LLT NarrowTy) {
- Register DstReg = MI.getOperand(0).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
- // Requires compatible types. Otherwise user of DstReg did not perform unmerge
- // that should have been artifact combined. Most likely instruction that uses
- // DstReg has to do more/fewer elements legalization compatible with NarrowTy.
- assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types");
- assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
- if (NarrowTy == SrcTy)
- return UnableToLegalize;
- // This attempts to lower part of LCMTy merge/unmerge sequence. Intended use
- // is for old mir tests. Since the changes to more/fewer elements it should no
- // longer be possible to generate MIR like this when starting from llvm-ir
- // because LCMTy approach was replaced with merge/unmerge to vector elements.
- if (TypeIdx == 1) {
- assert(SrcTy.isVector() && "Expected vector types");
- assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
- if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
- (NarrowTy.getNumElements() >= SrcTy.getNumElements()))
- return UnableToLegalize;
- // %2:_(DstTy) = G_CONCAT_VECTORS %0:_(SrcTy), %1:_(SrcTy)
- //
- // %3:_(EltTy), %4, %5 = G_UNMERGE_VALUES %0:_(SrcTy)
- // %6:_(EltTy), %7, %8 = G_UNMERGE_VALUES %1:_(SrcTy)
- // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4
- // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6
- // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8
- // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11
- SmallVector<Register, 8> Elts;
- LLT EltTy = MRI.getType(MI.getOperand(1).getReg()).getScalarType();
- for (unsigned i = 1; i < MI.getNumOperands(); ++i) {
- auto Unmerge = MIRBuilder.buildUnmerge(EltTy, MI.getOperand(i).getReg());
- for (unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
- Elts.push_back(Unmerge.getReg(j));
- }
- SmallVector<Register, 8> NarrowTyElts;
- unsigned NumNarrowTyElts = NarrowTy.getNumElements();
- unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
- for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces;
- ++i, Offset += NumNarrowTyElts) {
- ArrayRef<Register> Pieces(&Elts[Offset], NumNarrowTyElts);
- NarrowTyElts.push_back(MIRBuilder.buildMerge(NarrowTy, Pieces).getReg(0));
- }
- MIRBuilder.buildMerge(DstReg, NarrowTyElts);
- MI.eraseFromParent();
- return Legalized;
- }
- assert(TypeIdx == 0 && "Bad type index");
- if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
- (DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0))
- return UnableToLegalize;
- // This is most likely SrcTy (smaller then register size) packed in DstTy
- // (larger then register size) and since merge was not combined it will be
- // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy
- // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy.
- // %0:_(DstTy) = G_MERGE_VALUES %1:_(SrcTy), %2, %3, %4
- //
- // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg
- // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4
- // %0:_(DstTy) = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence
- SmallVector<Register, 8> NarrowTyElts;
- unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
- unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
- unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts;
- for (unsigned i = 0; i < NumParts; ++i) {
- SmallVector<Register, 8> Sources;
- for (unsigned j = 0; j < NumElts; ++j)
- Sources.push_back(MI.getOperand(1 + i * NumElts + j).getReg());
- NarrowTyElts.push_back(MIRBuilder.buildMerge(NarrowTy, Sources).getReg(0));
- }
- MIRBuilder.buildMerge(DstReg, NarrowTyElts);
- MI.eraseFromParent();
- return Legalized;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI,
- unsigned TypeIdx,
- LLT NarrowVecTy) {
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcVec = MI.getOperand(1).getReg();
- Register InsertVal;
- bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
- assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
- if (IsInsert)
- InsertVal = MI.getOperand(2).getReg();
- Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
- // TODO: Handle total scalarization case.
- if (!NarrowVecTy.isVector())
- return UnableToLegalize;
- LLT VecTy = MRI.getType(SrcVec);
- // If the index is a constant, we can really break this down as you would
- // expect, and index into the target size pieces.
- int64_t IdxVal;
- auto MaybeCst = getIConstantVRegValWithLookThrough(Idx, MRI);
- if (MaybeCst) {
- IdxVal = MaybeCst->Value.getSExtValue();
- // Avoid out of bounds indexing the pieces.
- if (IdxVal >= VecTy.getNumElements()) {
- MIRBuilder.buildUndef(DstReg);
- MI.eraseFromParent();
- return Legalized;
- }
- SmallVector<Register, 8> VecParts;
- LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
- // Build a sequence of NarrowTy pieces in VecParts for this operand.
- LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
- TargetOpcode::G_ANYEXT);
- unsigned NewNumElts = NarrowVecTy.getNumElements();
- LLT IdxTy = MRI.getType(Idx);
- int64_t PartIdx = IdxVal / NewNumElts;
- auto NewIdx =
- MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
- if (IsInsert) {
- LLT PartTy = MRI.getType(VecParts[PartIdx]);
- // Use the adjusted index to insert into one of the subvectors.
- auto InsertPart = MIRBuilder.buildInsertVectorElement(
- PartTy, VecParts[PartIdx], InsertVal, NewIdx);
- VecParts[PartIdx] = InsertPart.getReg(0);
- // Recombine the inserted subvector with the others to reform the result
- // vector.
- buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
- } else {
- MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
- }
- MI.eraseFromParent();
- return Legalized;
- }
- // With a variable index, we can't perform the operation in a smaller type, so
- // we're forced to expand this.
- //
- // TODO: We could emit a chain of compare/select to figure out which piece to
- // index.
- return lowerExtractInsertVectorElt(MI);
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::reduceLoadStoreWidth(GLoadStore &LdStMI, unsigned TypeIdx,
- LLT NarrowTy) {
- // FIXME: Don't know how to handle secondary types yet.
- if (TypeIdx != 0)
- return UnableToLegalize;
- // This implementation doesn't work for atomics. Give up instead of doing
- // something invalid.
- if (LdStMI.isAtomic())
- return UnableToLegalize;
- bool IsLoad = isa<GLoad>(LdStMI);
- Register ValReg = LdStMI.getReg(0);
- Register AddrReg = LdStMI.getPointerReg();
- LLT ValTy = MRI.getType(ValReg);
- // FIXME: Do we need a distinct NarrowMemory legalize action?
- if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize()) {
- LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
- return UnableToLegalize;
- }
- int NumParts = -1;
- int NumLeftover = -1;
- LLT LeftoverTy;
- SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
- if (IsLoad) {
- std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
- } else {
- if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
- NarrowLeftoverRegs)) {
- NumParts = NarrowRegs.size();
- NumLeftover = NarrowLeftoverRegs.size();
- }
- }
- if (NumParts == -1)
- return UnableToLegalize;
- LLT PtrTy = MRI.getType(AddrReg);
- const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
- unsigned TotalSize = ValTy.getSizeInBits();
- // Split the load/store into PartTy sized pieces starting at Offset. If this
- // is a load, return the new registers in ValRegs. For a store, each elements
- // of ValRegs should be PartTy. Returns the next offset that needs to be
- // handled.
- auto MMO = LdStMI.getMMO();
- auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
- unsigned Offset) -> unsigned {
- MachineFunction &MF = MIRBuilder.getMF();
- unsigned PartSize = PartTy.getSizeInBits();
- for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
- Offset += PartSize, ++Idx) {
- unsigned ByteOffset = Offset / 8;
- Register NewAddrReg;
- MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
- MachineMemOperand *NewMMO =
- MF.getMachineMemOperand(&MMO, ByteOffset, PartTy);
- if (IsLoad) {
- Register Dst = MRI.createGenericVirtualRegister(PartTy);
- ValRegs.push_back(Dst);
- MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
- } else {
- MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
- }
- }
- return Offset;
- };
- unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0);
- // Handle the rest of the register if this isn't an even type breakdown.
- if (LeftoverTy.isValid())
- splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset);
- if (IsLoad) {
- insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
- LeftoverTy, NarrowLeftoverRegs);
- }
- LdStMI.eraseFromParent();
- return Legalized;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
- LLT NarrowTy) {
- using namespace TargetOpcode;
- GenericMachineInstr &GMI = cast<GenericMachineInstr>(MI);
- unsigned NumElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
- switch (MI.getOpcode()) {
- case G_IMPLICIT_DEF:
- case G_TRUNC:
- case G_AND:
- case G_OR:
- case G_XOR:
- case G_ADD:
- case G_SUB:
- case G_MUL:
- case G_PTR_ADD:
- case G_SMULH:
- case G_UMULH:
- case G_FADD:
- case G_FMUL:
- case G_FSUB:
- case G_FNEG:
- case G_FABS:
- case G_FCANONICALIZE:
- case G_FDIV:
- case G_FREM:
- case G_FMA:
- case G_FMAD:
- case G_FPOW:
- case G_FEXP:
- case G_FEXP2:
- case G_FLOG:
- case G_FLOG2:
- case G_FLOG10:
- case G_FNEARBYINT:
- case G_FCEIL:
- case G_FFLOOR:
- case G_FRINT:
- case G_INTRINSIC_ROUND:
- case G_INTRINSIC_ROUNDEVEN:
- case G_INTRINSIC_TRUNC:
- case G_FCOS:
- case G_FSIN:
- case G_FSQRT:
- case G_BSWAP:
- case G_BITREVERSE:
- case G_SDIV:
- case G_UDIV:
- case G_SREM:
- case G_UREM:
- case G_SDIVREM:
- case G_UDIVREM:
- case G_SMIN:
- case G_SMAX:
- case G_UMIN:
- case G_UMAX:
- case G_ABS:
- case G_FMINNUM:
- case G_FMAXNUM:
- case G_FMINNUM_IEEE:
- case G_FMAXNUM_IEEE:
- case G_FMINIMUM:
- case G_FMAXIMUM:
- case G_FSHL:
- case G_FSHR:
- case G_ROTL:
- case G_ROTR:
- case G_FREEZE:
- case G_SADDSAT:
- case G_SSUBSAT:
- case G_UADDSAT:
- case G_USUBSAT:
- case G_UMULO:
- case G_SMULO:
- case G_SHL:
- case G_LSHR:
- case G_ASHR:
- case G_SSHLSAT:
- case G_USHLSAT:
- case G_CTLZ:
- case G_CTLZ_ZERO_UNDEF:
- case G_CTTZ:
- case G_CTTZ_ZERO_UNDEF:
- case G_CTPOP:
- case G_FCOPYSIGN:
- case G_ZEXT:
- case G_SEXT:
- case G_ANYEXT:
- case G_FPEXT:
- case G_FPTRUNC:
- case G_SITOFP:
- case G_UITOFP:
- case G_FPTOSI:
- case G_FPTOUI:
- case G_INTTOPTR:
- case G_PTRTOINT:
- case G_ADDRSPACE_CAST:
- return fewerElementsVectorMultiEltType(GMI, NumElts);
- case G_ICMP:
- case G_FCMP:
- return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*cpm predicate*/});
- case G_SELECT:
- if (MRI.getType(MI.getOperand(1).getReg()).isVector())
- return fewerElementsVectorMultiEltType(GMI, NumElts);
- return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*scalar cond*/});
- case G_PHI:
- return fewerElementsVectorPhi(GMI, NumElts);
- case G_UNMERGE_VALUES:
- return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
- case G_BUILD_VECTOR:
- assert(TypeIdx == 0 && "not a vector type index");
- return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
- case G_CONCAT_VECTORS:
- if (TypeIdx != 1) // TODO: This probably does work as expected already.
- return UnableToLegalize;
- return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
- case G_EXTRACT_VECTOR_ELT:
- case G_INSERT_VECTOR_ELT:
- return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
- case G_LOAD:
- case G_STORE:
- return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy);
- case G_SEXT_INREG:
- return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*imm*/});
- GISEL_VECREDUCE_CASES_NONSEQ
- return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
- case G_SHUFFLE_VECTOR:
- return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
- default:
- return UnableToLegalize;
- }
- }
- LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle(
- MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
- assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
- if (TypeIdx != 0)
- return UnableToLegalize;
- Register DstReg = MI.getOperand(0).getReg();
- Register Src1Reg = MI.getOperand(1).getReg();
- Register Src2Reg = MI.getOperand(2).getReg();
- ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
- LLT DstTy = MRI.getType(DstReg);
- LLT Src1Ty = MRI.getType(Src1Reg);
- LLT Src2Ty = MRI.getType(Src2Reg);
- // The shuffle should be canonicalized by now.
- if (DstTy != Src1Ty)
- return UnableToLegalize;
- if (DstTy != Src2Ty)
- return UnableToLegalize;
- if (!isPowerOf2_32(DstTy.getNumElements()))
- return UnableToLegalize;
- // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly.
- // Further legalization attempts will be needed to do split further.
- NarrowTy =
- DstTy.changeElementCount(DstTy.getElementCount().divideCoefficientBy(2));
- unsigned NewElts = NarrowTy.getNumElements();
- SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
- extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs);
- extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs);
- Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
- SplitSrc2Regs[1]};
- Register Hi, Lo;
- // If Lo or Hi uses elements from at most two of the four input vectors, then
- // express it as a vector shuffle of those two inputs. Otherwise extract the
- // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
- SmallVector<int, 16> Ops;
- for (unsigned High = 0; High < 2; ++High) {
- Register &Output = High ? Hi : Lo;
- // Build a shuffle mask for the output, discovering on the fly which
- // input vectors to use as shuffle operands (recorded in InputUsed).
- // If building a suitable shuffle vector proves too hard, then bail
- // out with useBuildVector set.
- unsigned InputUsed[2] = {-1U, -1U}; // Not yet discovered.
- unsigned FirstMaskIdx = High * NewElts;
- bool UseBuildVector = false;
- for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
- // The mask element. This indexes into the input.
- int Idx = Mask[FirstMaskIdx + MaskOffset];
- // The input vector this mask element indexes into.
- unsigned Input = (unsigned)Idx / NewElts;
- if (Input >= array_lengthof(Inputs)) {
- // The mask element does not index into any input vector.
- Ops.push_back(-1);
- continue;
- }
- // Turn the index into an offset from the start of the input vector.
- Idx -= Input * NewElts;
- // Find or create a shuffle vector operand to hold this input.
- unsigned OpNo;
- for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) {
- if (InputUsed[OpNo] == Input) {
- // This input vector is already an operand.
- break;
- } else if (InputUsed[OpNo] == -1U) {
- // Create a new operand for this input vector.
- InputUsed[OpNo] = Input;
- break;
- }
- }
- if (OpNo >= array_lengthof(InputUsed)) {
- // More than two input vectors used! Give up on trying to create a
- // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
- UseBuildVector = true;
- break;
- }
- // Add the mask index for the new shuffle vector.
- Ops.push_back(Idx + OpNo * NewElts);
- }
- if (UseBuildVector) {
- LLT EltTy = NarrowTy.getElementType();
- SmallVector<Register, 16> SVOps;
- // Extract the input elements by hand.
- for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
- // The mask element. This indexes into the input.
- int Idx = Mask[FirstMaskIdx + MaskOffset];
- // The input vector this mask element indexes into.
- unsigned Input = (unsigned)Idx / NewElts;
- if (Input >= array_lengthof(Inputs)) {
- // The mask element is "undef" or indexes off the end of the input.
- SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0));
- continue;
- }
- // Turn the index into an offset from the start of the input vector.
- Idx -= Input * NewElts;
- // Extract the vector element by hand.
- SVOps.push_back(MIRBuilder
- .buildExtractVectorElement(
- EltTy, Inputs[Input],
- MIRBuilder.buildConstant(LLT::scalar(32), Idx))
- .getReg(0));
- }
- // Construct the Lo/Hi output using a G_BUILD_VECTOR.
- Output = MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
- } else if (InputUsed[0] == -1U) {
- // No input vectors were used! The result is undefined.
- Output = MIRBuilder.buildUndef(NarrowTy).getReg(0);
- } else {
- Register Op0 = Inputs[InputUsed[0]];
- // If only one input was used, use an undefined vector for the other.
- Register Op1 = InputUsed[1] == -1U
- ? MIRBuilder.buildUndef(NarrowTy).getReg(0)
- : Inputs[InputUsed[1]];
- // At least one input vector was used. Create a new shuffle vector.
- Output = MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1, Ops).getReg(0);
- }
- Ops.clear();
- }
- MIRBuilder.buildConcatVectors(DstReg, {Lo, Hi});
- MI.eraseFromParent();
- return Legalized;
- }
- static unsigned getScalarOpcForReduction(unsigned Opc) {
- unsigned ScalarOpc;
- switch (Opc) {
- case TargetOpcode::G_VECREDUCE_FADD:
- ScalarOpc = TargetOpcode::G_FADD;
- break;
- case TargetOpcode::G_VECREDUCE_FMUL:
- ScalarOpc = TargetOpcode::G_FMUL;
- break;
- case TargetOpcode::G_VECREDUCE_FMAX:
- ScalarOpc = TargetOpcode::G_FMAXNUM;
- break;
- case TargetOpcode::G_VECREDUCE_FMIN:
- ScalarOpc = TargetOpcode::G_FMINNUM;
- break;
- case TargetOpcode::G_VECREDUCE_ADD:
- ScalarOpc = TargetOpcode::G_ADD;
- break;
- case TargetOpcode::G_VECREDUCE_MUL:
- ScalarOpc = TargetOpcode::G_MUL;
- break;
- case TargetOpcode::G_VECREDUCE_AND:
- ScalarOpc = TargetOpcode::G_AND;
- break;
- case TargetOpcode::G_VECREDUCE_OR:
- ScalarOpc = TargetOpcode::G_OR;
- break;
- case TargetOpcode::G_VECREDUCE_XOR:
- ScalarOpc = TargetOpcode::G_XOR;
- break;
- case TargetOpcode::G_VECREDUCE_SMAX:
- ScalarOpc = TargetOpcode::G_SMAX;
- break;
- case TargetOpcode::G_VECREDUCE_SMIN:
- ScalarOpc = TargetOpcode::G_SMIN;
- break;
- case TargetOpcode::G_VECREDUCE_UMAX:
- ScalarOpc = TargetOpcode::G_UMAX;
- break;
- case TargetOpcode::G_VECREDUCE_UMIN:
- ScalarOpc = TargetOpcode::G_UMIN;
- break;
- default:
- llvm_unreachable("Unhandled reduction");
- }
- return ScalarOpc;
- }
- LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions(
- MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
- unsigned Opc = MI.getOpcode();
- assert(Opc != TargetOpcode::G_VECREDUCE_SEQ_FADD &&
- Opc != TargetOpcode::G_VECREDUCE_SEQ_FMUL &&
- "Sequential reductions not expected");
- if (TypeIdx != 1)
- return UnableToLegalize;
- // The semantics of the normal non-sequential reductions allow us to freely
- // re-associate the operation.
- Register SrcReg = MI.getOperand(1).getReg();
- LLT SrcTy = MRI.getType(SrcReg);
- Register DstReg = MI.getOperand(0).getReg();
- LLT DstTy = MRI.getType(DstReg);
- if (NarrowTy.isVector() &&
- (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
- return UnableToLegalize;
- unsigned ScalarOpc = getScalarOpcForReduction(Opc);
- SmallVector<Register> SplitSrcs;
- // If NarrowTy is a scalar then we're being asked to scalarize.
- const unsigned NumParts =
- NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements()
- : SrcTy.getNumElements();
- extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs);
- if (NarrowTy.isScalar()) {
- if (DstTy != NarrowTy)
- return UnableToLegalize; // FIXME: handle implicit extensions.
- if (isPowerOf2_32(NumParts)) {
- // Generate a tree of scalar operations to reduce the critical path.
- SmallVector<Register> PartialResults;
- unsigned NumPartsLeft = NumParts;
- while (NumPartsLeft > 1) {
- for (unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
- PartialResults.emplace_back(
- MIRBuilder
- .buildInstr(ScalarOpc, {NarrowTy},
- {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
- .getReg(0));
- }
- SplitSrcs = PartialResults;
- PartialResults.clear();
- NumPartsLeft = SplitSrcs.size();
- }
- assert(SplitSrcs.size() == 1);
- MIRBuilder.buildCopy(DstReg, SplitSrcs[0]);
- MI.eraseFromParent();
- return Legalized;
- }
- // If we can't generate a tree, then just do sequential operations.
- Register Acc = SplitSrcs[0];
- for (unsigned Idx = 1; Idx < NumParts; ++Idx)
- Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
- .getReg(0);
- MIRBuilder.buildCopy(DstReg, Acc);
- MI.eraseFromParent();
- return Legalized;
- }
- SmallVector<Register> PartialReductions;
- for (unsigned Part = 0; Part < NumParts; ++Part) {
- PartialReductions.push_back(
- MIRBuilder.buildInstr(Opc, {DstTy}, {SplitSrcs[Part]}).getReg(0));
- }
- // If the types involved are powers of 2, we can generate intermediate vector
- // ops, before generating a final reduction operation.
- if (isPowerOf2_32(SrcTy.getNumElements()) &&
- isPowerOf2_32(NarrowTy.getNumElements())) {
- return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
- }
- Register Acc = PartialReductions[0];
- for (unsigned Part = 1; Part < NumParts; ++Part) {
- if (Part == NumParts - 1) {
- MIRBuilder.buildInstr(ScalarOpc, {DstReg},
- {Acc, PartialReductions[Part]});
- } else {
- Acc = MIRBuilder
- .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
- .getReg(0);
- }
- }
- MI.eraseFromParent();
- return Legalized;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
- LLT SrcTy, LLT NarrowTy,
- unsigned ScalarOpc) {
- SmallVector<Register> SplitSrcs;
- // Split the sources into NarrowTy size pieces.
- extractParts(SrcReg, NarrowTy,
- SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs);
- // We're going to do a tree reduction using vector operations until we have
- // one NarrowTy size value left.
- while (SplitSrcs.size() > 1) {
- SmallVector<Register> PartialRdxs;
- for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
- Register LHS = SplitSrcs[Idx];
- Register RHS = SplitSrcs[Idx + 1];
- // Create the intermediate vector op.
- Register Res =
- MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0);
- PartialRdxs.push_back(Res);
- }
- SplitSrcs = std::move(PartialRdxs);
- }
- // Finally generate the requested NarrowTy based reduction.
- Observer.changingInstr(MI);
- MI.getOperand(1).setReg(SplitSrcs[0]);
- Observer.changedInstr(MI);
- return Legalized;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt,
- const LLT HalfTy, const LLT AmtTy) {
- Register InL = MRI.createGenericVirtualRegister(HalfTy);
- Register InH = MRI.createGenericVirtualRegister(HalfTy);
- MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
- if (Amt.isZero()) {
- MIRBuilder.buildMerge(MI.getOperand(0), {InL, InH});
- MI.eraseFromParent();
- return Legalized;
- }
- LLT NVT = HalfTy;
- unsigned NVTBits = HalfTy.getSizeInBits();
- unsigned VTBits = 2 * NVTBits;
- SrcOp Lo(Register(0)), Hi(Register(0));
- if (MI.getOpcode() == TargetOpcode::G_SHL) {
- if (Amt.ugt(VTBits)) {
- Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
- } else if (Amt.ugt(NVTBits)) {
- Lo = MIRBuilder.buildConstant(NVT, 0);
- Hi = MIRBuilder.buildShl(NVT, InL,
- MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
- } else if (Amt == NVTBits) {
- Lo = MIRBuilder.buildConstant(NVT, 0);
- Hi = InL;
- } else {
- Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
- auto OrLHS =
- MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
- auto OrRHS = MIRBuilder.buildLShr(
- NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
- Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
- }
- } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
- if (Amt.ugt(VTBits)) {
- Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
- } else if (Amt.ugt(NVTBits)) {
- Lo = MIRBuilder.buildLShr(NVT, InH,
- MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
- Hi = MIRBuilder.buildConstant(NVT, 0);
- } else if (Amt == NVTBits) {
- Lo = InH;
- Hi = MIRBuilder.buildConstant(NVT, 0);
- } else {
- auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
- auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
- auto OrRHS = MIRBuilder.buildShl(
- NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
- Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
- Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
- }
- } else {
- if (Amt.ugt(VTBits)) {
- Hi = Lo = MIRBuilder.buildAShr(
- NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
- } else if (Amt.ugt(NVTBits)) {
- Lo = MIRBuilder.buildAShr(NVT, InH,
- MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
- Hi = MIRBuilder.buildAShr(NVT, InH,
- MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
- } else if (Amt == NVTBits) {
- Lo = InH;
- Hi = MIRBuilder.buildAShr(NVT, InH,
- MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
- } else {
- auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
- auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
- auto OrRHS = MIRBuilder.buildShl(
- NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
- Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
- Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
- }
- }
- MIRBuilder.buildMerge(MI.getOperand(0), {Lo, Hi});
- MI.eraseFromParent();
- return Legalized;
- }
- // TODO: Optimize if constant shift amount.
- LegalizerHelper::LegalizeResult
- LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx,
- LLT RequestedTy) {
- if (TypeIdx == 1) {
- Observer.changingInstr(MI);
- narrowScalarSrc(MI, RequestedTy, 2);
- Observer.changedInstr(MI);
- return Legalized;
- }
- Register DstReg = MI.getOperand(0).getReg();
- LLT DstTy = MRI.getType(DstReg);
- if (DstTy.isVector())
- return UnableToLegalize;
- Register Amt = MI.getOperand(2).getReg();
- LLT ShiftAmtTy = MRI.getType(Amt);
- const unsigned DstEltSize = DstTy.getScalarSizeInBits();
- if (DstEltSize % 2 != 0)
- return UnableToLegalize;
- // Ignore the input type. We can only go to exactly half the size of the
- // input. If that isn't small enough, the resulting pieces will be further
- // legalized.
- const unsigned NewBitSize = DstEltSize / 2;
- const LLT HalfTy = LLT::scalar(NewBitSize);
- const LLT CondTy = LLT::scalar(1);
- if (auto VRegAndVal = getIConstantVRegValWithLookThrough(Amt, MRI)) {
- return narrowScalarShiftByConstant(MI, VRegAndVal->Value, HalfTy,
- ShiftAmtTy);
- }
- // TODO: Expand with known bits.
- // Handle the fully general expansion by an unknown amount.
- auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
- Register InL = MRI.createGenericVirtualRegister(HalfTy);
- Register InH = MRI.createGenericVirtualRegister(HalfTy);
- MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
- auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
- auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
- auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
- auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
- auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
- Register ResultRegs[2];
- switch (MI.getOpcode()) {
- case TargetOpcode::G_SHL: {
- // Short: ShAmt < NewBitSize
- auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
- auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
- auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
- auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
- // Long: ShAmt >= NewBitSize
- auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
- auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
- auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
- auto Hi = MIRBuilder.buildSelect(
- HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
- ResultRegs[0] = Lo.getReg(0);
- ResultRegs[1] = Hi.getReg(0);
- break;
- }
- case TargetOpcode::G_LSHR:
- case TargetOpcode::G_ASHR: {
- // Short: ShAmt < NewBitSize
- auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
- auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
- auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
- auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
- // Long: ShAmt >= NewBitSize
- MachineInstrBuilder HiL;
- if (MI.getOpcode() == TargetOpcode::G_LSHR) {
- HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
- } else {
- auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
- HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part.
- }
- auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
- {InH, AmtExcess}); // Lo from Hi part.
- auto Lo = MIRBuilder.buildSelect(
- HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
- auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
- ResultRegs[0] = Lo.getReg(0);
- ResultRegs[1] = Hi.getReg(0);
- break;
- }
- default:
- llvm_unreachable("not a shift");
- }
- MIRBuilder.buildMerge(DstReg, ResultRegs);
- MI.eraseFromParent();
- return Legalized;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
- LLT MoreTy) {
- assert(TypeIdx == 0 && "Expecting only Idx 0");
- Observer.changingInstr(MI);
- for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
- MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
- MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
- moreElementsVectorSrc(MI, MoreTy, I);
- }
- MachineBasicBlock &MBB = *MI.getParent();
- MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
- moreElementsVectorDst(MI, MoreTy, 0);
- Observer.changedInstr(MI);
- return Legalized;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
- LLT MoreTy) {
- unsigned Opc = MI.getOpcode();
- switch (Opc) {
- case TargetOpcode::G_IMPLICIT_DEF:
- case TargetOpcode::G_LOAD: {
- if (TypeIdx != 0)
- return UnableToLegalize;
- Observer.changingInstr(MI);
- moreElementsVectorDst(MI, MoreTy, 0);
- Observer.changedInstr(MI);
- return Legalized;
- }
- case TargetOpcode::G_STORE:
- if (TypeIdx != 0)
- return UnableToLegalize;
- Observer.changingInstr(MI);
- moreElementsVectorSrc(MI, MoreTy, 0);
- Observer.changedInstr(MI);
- return Legalized;
- case TargetOpcode::G_AND:
- case TargetOpcode::G_OR:
- case TargetOpcode::G_XOR:
- case TargetOpcode::G_ADD:
- case TargetOpcode::G_SUB:
- case TargetOpcode::G_MUL:
- case TargetOpcode::G_FADD:
- case TargetOpcode::G_FMUL:
- case TargetOpcode::G_UADDSAT:
- case TargetOpcode::G_USUBSAT:
- case TargetOpcode::G_SADDSAT:
- case TargetOpcode::G_SSUBSAT:
- case TargetOpcode::G_SMIN:
- case TargetOpcode::G_SMAX:
- case TargetOpcode::G_UMIN:
- case TargetOpcode::G_UMAX:
- case TargetOpcode::G_FMINNUM:
- case TargetOpcode::G_FMAXNUM:
- case TargetOpcode::G_FMINNUM_IEEE:
- case TargetOpcode::G_FMAXNUM_IEEE:
- case TargetOpcode::G_FMINIMUM:
- case TargetOpcode::G_FMAXIMUM: {
- Observer.changingInstr(MI);
- moreElementsVectorSrc(MI, MoreTy, 1);
- moreElementsVectorSrc(MI, MoreTy, 2);
- moreElementsVectorDst(MI, MoreTy, 0);
- Observer.changedInstr(MI);
- return Legalized;
- }
- case TargetOpcode::G_FMA:
- case TargetOpcode::G_FSHR:
- case TargetOpcode::G_FSHL: {
- Observer.changingInstr(MI);
- moreElementsVectorSrc(MI, MoreTy, 1);
- moreElementsVectorSrc(MI, MoreTy, 2);
- moreElementsVectorSrc(MI, MoreTy, 3);
- moreElementsVectorDst(MI, MoreTy, 0);
- Observer.changedInstr(MI);
- return Legalized;
- }
- case TargetOpcode::G_EXTRACT:
- if (TypeIdx != 1)
- return UnableToLegalize;
- Observer.changingInstr(MI);
- moreElementsVectorSrc(MI, MoreTy, 1);
- Observer.changedInstr(MI);
- return Legalized;
- case TargetOpcode::G_INSERT:
- case TargetOpcode::G_FREEZE:
- case TargetOpcode::G_FNEG:
- case TargetOpcode::G_FABS:
- case TargetOpcode::G_BSWAP:
- case TargetOpcode::G_FCANONICALIZE:
- case TargetOpcode::G_SEXT_INREG:
- if (TypeIdx != 0)
- return UnableToLegalize;
- Observer.changingInstr(MI);
- moreElementsVectorSrc(MI, MoreTy, 1);
- moreElementsVectorDst(MI, MoreTy, 0);
- Observer.changedInstr(MI);
- return Legalized;
- case TargetOpcode::G_SELECT:
- if (TypeIdx != 0)
- return UnableToLegalize;
- if (MRI.getType(MI.getOperand(1).getReg()).isVector())
- return UnableToLegalize;
- Observer.changingInstr(MI);
- moreElementsVectorSrc(MI, MoreTy, 2);
- moreElementsVectorSrc(MI, MoreTy, 3);
- moreElementsVectorDst(MI, MoreTy, 0);
- Observer.changedInstr(MI);
- return Legalized;
- case TargetOpcode::G_UNMERGE_VALUES:
- return UnableToLegalize;
- case TargetOpcode::G_PHI:
- return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
- case TargetOpcode::G_SHUFFLE_VECTOR:
- return moreElementsVectorShuffle(MI, TypeIdx, MoreTy);
- case TargetOpcode::G_BUILD_VECTOR: {
- SmallVector<SrcOp, 8> Elts;
- for (auto Op : MI.uses()) {
- Elts.push_back(Op.getReg());
- }
- for (unsigned i = Elts.size(); i < MoreTy.getNumElements(); ++i) {
- Elts.push_back(MIRBuilder.buildUndef(MoreTy.getScalarType()));
- }
- MIRBuilder.buildDeleteTrailingVectorElements(
- MI.getOperand(0).getReg(), MIRBuilder.buildInstr(Opc, {MoreTy}, Elts));
- MI.eraseFromParent();
- return Legalized;
- }
- case TargetOpcode::G_TRUNC: {
- Observer.changingInstr(MI);
- moreElementsVectorSrc(MI, MoreTy, 1);
- moreElementsVectorDst(MI, MoreTy, 0);
- Observer.changedInstr(MI);
- return Legalized;
- }
- default:
- return UnableToLegalize;
- }
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::moreElementsVectorShuffle(MachineInstr &MI,
- unsigned int TypeIdx, LLT MoreTy) {
- if (TypeIdx != 0)
- return UnableToLegalize;
- Register DstReg = MI.getOperand(0).getReg();
- Register Src1Reg = MI.getOperand(1).getReg();
- Register Src2Reg = MI.getOperand(2).getReg();
- ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
- LLT DstTy = MRI.getType(DstReg);
- LLT Src1Ty = MRI.getType(Src1Reg);
- LLT Src2Ty = MRI.getType(Src2Reg);
- unsigned NumElts = DstTy.getNumElements();
- unsigned WidenNumElts = MoreTy.getNumElements();
- // Expect a canonicalized shuffle.
- if (DstTy != Src1Ty || DstTy != Src2Ty)
- return UnableToLegalize;
- moreElementsVectorSrc(MI, MoreTy, 1);
- moreElementsVectorSrc(MI, MoreTy, 2);
- // Adjust mask based on new input vector length.
- SmallVector<int, 16> NewMask;
- for (unsigned I = 0; I != NumElts; ++I) {
- int Idx = Mask[I];
- if (Idx < static_cast<int>(NumElts))
- NewMask.push_back(Idx);
- else
- NewMask.push_back(Idx - NumElts + WidenNumElts);
- }
- for (unsigned I = NumElts; I != WidenNumElts; ++I)
- NewMask.push_back(-1);
- moreElementsVectorDst(MI, MoreTy, 0);
- MIRBuilder.setInstrAndDebugLoc(MI);
- MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
- MI.getOperand(1).getReg(),
- MI.getOperand(2).getReg(), NewMask);
- MI.eraseFromParent();
- return Legalized;
- }
- void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
- ArrayRef<Register> Src1Regs,
- ArrayRef<Register> Src2Regs,
- LLT NarrowTy) {
- MachineIRBuilder &B = MIRBuilder;
- unsigned SrcParts = Src1Regs.size();
- unsigned DstParts = DstRegs.size();
- unsigned DstIdx = 0; // Low bits of the result.
- Register FactorSum =
- B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
- DstRegs[DstIdx] = FactorSum;
- unsigned CarrySumPrevDstIdx;
- SmallVector<Register, 4> Factors;
- for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
- // Collect low parts of muls for DstIdx.
- for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
- i <= std::min(DstIdx, SrcParts - 1); ++i) {
- MachineInstrBuilder Mul =
- B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
- Factors.push_back(Mul.getReg(0));
- }
- // Collect high parts of muls from previous DstIdx.
- for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
- i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
- MachineInstrBuilder Umulh =
- B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
- Factors.push_back(Umulh.getReg(0));
- }
- // Add CarrySum from additions calculated for previous DstIdx.
- if (DstIdx != 1) {
- Factors.push_back(CarrySumPrevDstIdx);
- }
- Register CarrySum;
- // Add all factors and accumulate all carries into CarrySum.
- if (DstIdx != DstParts - 1) {
- MachineInstrBuilder Uaddo =
- B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
- FactorSum = Uaddo.getReg(0);
- CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
- for (unsigned i = 2; i < Factors.size(); ++i) {
- MachineInstrBuilder Uaddo =
- B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
- FactorSum = Uaddo.getReg(0);
- MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
- CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
- }
- } else {
- // Since value for the next index is not calculated, neither is CarrySum.
- FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
- for (unsigned i = 2; i < Factors.size(); ++i)
- FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
- }
- CarrySumPrevDstIdx = CarrySum;
- DstRegs[DstIdx] = FactorSum;
- Factors.clear();
- }
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx,
- LLT NarrowTy) {
- if (TypeIdx != 0)
- return UnableToLegalize;
- Register DstReg = MI.getOperand(0).getReg();
- LLT DstType = MRI.getType(DstReg);
- // FIXME: add support for vector types
- if (DstType.isVector())
- return UnableToLegalize;
- unsigned Opcode = MI.getOpcode();
- unsigned OpO, OpE, OpF;
- switch (Opcode) {
- case TargetOpcode::G_SADDO:
- case TargetOpcode::G_SADDE:
- case TargetOpcode::G_UADDO:
- case TargetOpcode::G_UADDE:
- case TargetOpcode::G_ADD:
- OpO = TargetOpcode::G_UADDO;
- OpE = TargetOpcode::G_UADDE;
- OpF = TargetOpcode::G_UADDE;
- if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
- OpF = TargetOpcode::G_SADDE;
- break;
- case TargetOpcode::G_SSUBO:
- case TargetOpcode::G_SSUBE:
- case TargetOpcode::G_USUBO:
- case TargetOpcode::G_USUBE:
- case TargetOpcode::G_SUB:
- OpO = TargetOpcode::G_USUBO;
- OpE = TargetOpcode::G_USUBE;
- OpF = TargetOpcode::G_USUBE;
- if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
- OpF = TargetOpcode::G_SSUBE;
- break;
- default:
- llvm_unreachable("Unexpected add/sub opcode!");
- }
- // 1 for a plain add/sub, 2 if this is an operation with a carry-out.
- unsigned NumDefs = MI.getNumExplicitDefs();
- Register Src1 = MI.getOperand(NumDefs).getReg();
- Register Src2 = MI.getOperand(NumDefs + 1).getReg();
- Register CarryDst, CarryIn;
- if (NumDefs == 2)
- CarryDst = MI.getOperand(1).getReg();
- if (MI.getNumOperands() == NumDefs + 3)
- CarryIn = MI.getOperand(NumDefs + 2).getReg();
- LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
- LLT LeftoverTy, DummyTy;
- SmallVector<Register, 2> Src1Regs, Src2Regs, Src1Left, Src2Left, DstRegs;
- extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left);
- extractParts(Src2, RegTy, NarrowTy, DummyTy, Src2Regs, Src2Left);
- int NarrowParts = Src1Regs.size();
- for (int I = 0, E = Src1Left.size(); I != E; ++I) {
- Src1Regs.push_back(Src1Left[I]);
- Src2Regs.push_back(Src2Left[I]);
- }
- DstRegs.reserve(Src1Regs.size());
- for (int i = 0, e = Src1Regs.size(); i != e; ++i) {
- Register DstReg =
- MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i]));
- Register CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
- // Forward the final carry-out to the destination register
- if (i == e - 1 && CarryDst)
- CarryOut = CarryDst;
- if (!CarryIn) {
- MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
- {Src1Regs[i], Src2Regs[i]});
- } else if (i == e - 1) {
- MIRBuilder.buildInstr(OpF, {DstReg, CarryOut},
- {Src1Regs[i], Src2Regs[i], CarryIn});
- } else {
- MIRBuilder.buildInstr(OpE, {DstReg, CarryOut},
- {Src1Regs[i], Src2Regs[i], CarryIn});
- }
- DstRegs.push_back(DstReg);
- CarryIn = CarryOut;
- }
- insertParts(MI.getOperand(0).getReg(), RegTy, NarrowTy,
- makeArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
- makeArrayRef(DstRegs).drop_front(NarrowParts));
- MI.eraseFromParent();
- return Legalized;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
- Register DstReg = MI.getOperand(0).getReg();
- Register Src1 = MI.getOperand(1).getReg();
- Register Src2 = MI.getOperand(2).getReg();
- LLT Ty = MRI.getType(DstReg);
- if (Ty.isVector())
- return UnableToLegalize;
- unsigned Size = Ty.getSizeInBits();
- unsigned NarrowSize = NarrowTy.getSizeInBits();
- if (Size % NarrowSize != 0)
- return UnableToLegalize;
- unsigned NumParts = Size / NarrowSize;
- bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
- unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
- SmallVector<Register, 2> Src1Parts, Src2Parts;
- SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
- extractParts(Src1, NarrowTy, NumParts, Src1Parts);
- extractParts(Src2, NarrowTy, NumParts, Src2Parts);
- multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
- // Take only high half of registers if this is high mul.
- ArrayRef<Register> DstRegs(&DstTmpRegs[DstTmpParts - NumParts], NumParts);
- MIRBuilder.buildMerge(DstReg, DstRegs);
- MI.eraseFromParent();
- return Legalized;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx,
- LLT NarrowTy) {
- if (TypeIdx != 0)
- return UnableToLegalize;
- bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI;
- Register Src = MI.getOperand(1).getReg();
- LLT SrcTy = MRI.getType(Src);
- // If all finite floats fit into the narrowed integer type, we can just swap
- // out the result type. This is practically only useful for conversions from
- // half to at least 16-bits, so just handle the one case.
- if (SrcTy.getScalarType() != LLT::scalar(16) ||
- NarrowTy.getScalarSizeInBits() < (IsSigned ? 17u : 16u))
- return UnableToLegalize;
- Observer.changingInstr(MI);
- narrowScalarDst(MI, NarrowTy, 0,
- IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
- Observer.changedInstr(MI);
- return Legalized;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx,
- LLT NarrowTy) {
- if (TypeIdx != 1)
- return UnableToLegalize;
- uint64_t NarrowSize = NarrowTy.getSizeInBits();
- int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
- // FIXME: add support for when SizeOp1 isn't an exact multiple of
- // NarrowSize.
- if (SizeOp1 % NarrowSize != 0)
- return UnableToLegalize;
- int NumParts = SizeOp1 / NarrowSize;
- SmallVector<Register, 2> SrcRegs, DstRegs;
- SmallVector<uint64_t, 2> Indexes;
- extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
- Register OpReg = MI.getOperand(0).getReg();
- uint64_t OpStart = MI.getOperand(2).getImm();
- uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
- for (int i = 0; i < NumParts; ++i) {
- unsigned SrcStart = i * NarrowSize;
- if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
- // No part of the extract uses this subregister, ignore it.
- continue;
- } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
- // The entire subregister is extracted, forward the value.
- DstRegs.push_back(SrcRegs[i]);
- continue;
- }
- // OpSegStart is where this destination segment would start in OpReg if it
- // extended infinitely in both directions.
- int64_t ExtractOffset;
- uint64_t SegSize;
- if (OpStart < SrcStart) {
- ExtractOffset = 0;
- SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
- } else {
- ExtractOffset = OpStart - SrcStart;
- SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
- }
- Register SegReg = SrcRegs[i];
- if (ExtractOffset != 0 || SegSize != NarrowSize) {
- // A genuine extract is needed.
- SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
- MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
- }
- DstRegs.push_back(SegReg);
- }
- Register DstReg = MI.getOperand(0).getReg();
- if (MRI.getType(DstReg).isVector())
- MIRBuilder.buildBuildVector(DstReg, DstRegs);
- else if (DstRegs.size() > 1)
- MIRBuilder.buildMerge(DstReg, DstRegs);
- else
- MIRBuilder.buildCopy(DstReg, DstRegs[0]);
- MI.eraseFromParent();
- return Legalized;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx,
- LLT NarrowTy) {
- // FIXME: Don't know how to handle secondary types yet.
- if (TypeIdx != 0)
- return UnableToLegalize;
- SmallVector<Register, 2> SrcRegs, LeftoverRegs, DstRegs;
- SmallVector<uint64_t, 2> Indexes;
- LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
- LLT LeftoverTy;
- extractParts(MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
- LeftoverRegs);
- for (Register Reg : LeftoverRegs)
- SrcRegs.push_back(Reg);
- uint64_t NarrowSize = NarrowTy.getSizeInBits();
- Register OpReg = MI.getOperand(2).getReg();
- uint64_t OpStart = MI.getOperand(3).getImm();
- uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
- for (int I = 0, E = SrcRegs.size(); I != E; ++I) {
- unsigned DstStart = I * NarrowSize;
- if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
- // The entire subregister is defined by this insert, forward the new
- // value.
- DstRegs.push_back(OpReg);
- continue;
- }
- Register SrcReg = SrcRegs[I];
- if (MRI.getType(SrcRegs[I]) == LeftoverTy) {
- // The leftover reg is smaller than NarrowTy, so we need to extend it.
- SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
- MIRBuilder.buildAnyExt(SrcReg, SrcRegs[I]);
- }
- if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
- // No part of the insert affects this subregister, forward the original.
- DstRegs.push_back(SrcReg);
- continue;
- }
- // OpSegStart is where this destination segment would start in OpReg if it
- // extended infinitely in both directions.
- int64_t ExtractOffset, InsertOffset;
- uint64_t SegSize;
- if (OpStart < DstStart) {
- InsertOffset = 0;
- ExtractOffset = DstStart - OpStart;
- SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
- } else {
- InsertOffset = OpStart - DstStart;
- ExtractOffset = 0;
- SegSize =
- std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
- }
- Register SegReg = OpReg;
- if (ExtractOffset != 0 || SegSize != OpSize) {
- // A genuine extract is needed.
- SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
- MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
- }
- Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
- MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset);
- DstRegs.push_back(DstReg);
- }
- uint64_t WideSize = DstRegs.size() * NarrowSize;
- Register DstReg = MI.getOperand(0).getReg();
- if (WideSize > RegTy.getSizeInBits()) {
- Register MergeReg = MRI.createGenericVirtualRegister(LLT::scalar(WideSize));
- MIRBuilder.buildMerge(MergeReg, DstRegs);
- MIRBuilder.buildTrunc(DstReg, MergeReg);
- } else
- MIRBuilder.buildMerge(DstReg, DstRegs);
- MI.eraseFromParent();
- return Legalized;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx,
- LLT NarrowTy) {
- Register DstReg = MI.getOperand(0).getReg();
- LLT DstTy = MRI.getType(DstReg);
- assert(MI.getNumOperands() == 3 && TypeIdx == 0);
- SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
- SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
- SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
- LLT LeftoverTy;
- if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
- Src0Regs, Src0LeftoverRegs))
- return UnableToLegalize;
- LLT Unused;
- if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
- Src1Regs, Src1LeftoverRegs))
- llvm_unreachable("inconsistent extractParts result");
- for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
- auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
- {Src0Regs[I], Src1Regs[I]});
- DstRegs.push_back(Inst.getReg(0));
- }
- for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
- auto Inst = MIRBuilder.buildInstr(
- MI.getOpcode(),
- {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
- DstLeftoverRegs.push_back(Inst.getReg(0));
- }
- insertParts(DstReg, DstTy, NarrowTy, DstRegs,
- LeftoverTy, DstLeftoverRegs);
- MI.eraseFromParent();
- return Legalized;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::narrowScalarExt(MachineInstr &MI, unsigned TypeIdx,
- LLT NarrowTy) {
- if (TypeIdx != 0)
- return UnableToLegalize;
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(DstReg);
- if (DstTy.isVector())
- return UnableToLegalize;
- SmallVector<Register, 8> Parts;
- LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
- LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
- buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
- MI.eraseFromParent();
- return Legalized;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx,
- LLT NarrowTy) {
- if (TypeIdx != 0)
- return UnableToLegalize;
- Register CondReg = MI.getOperand(1).getReg();
- LLT CondTy = MRI.getType(CondReg);
- if (CondTy.isVector()) // TODO: Handle vselect
- return UnableToLegalize;
- Register DstReg = MI.getOperand(0).getReg();
- LLT DstTy = MRI.getType(DstReg);
- SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
- SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
- SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
- LLT LeftoverTy;
- if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
- Src1Regs, Src1LeftoverRegs))
- return UnableToLegalize;
- LLT Unused;
- if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
- Src2Regs, Src2LeftoverRegs))
- llvm_unreachable("inconsistent extractParts result");
- for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
- auto Select = MIRBuilder.buildSelect(NarrowTy,
- CondReg, Src1Regs[I], Src2Regs[I]);
- DstRegs.push_back(Select.getReg(0));
- }
- for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
- auto Select = MIRBuilder.buildSelect(
- LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
- DstLeftoverRegs.push_back(Select.getReg(0));
- }
- insertParts(DstReg, DstTy, NarrowTy, DstRegs,
- LeftoverTy, DstLeftoverRegs);
- MI.eraseFromParent();
- return Legalized;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx,
- LLT NarrowTy) {
- if (TypeIdx != 1)
- return UnableToLegalize;
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT SrcTy = MRI.getType(SrcReg);
- unsigned NarrowSize = NarrowTy.getSizeInBits();
- if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
- const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
- MachineIRBuilder &B = MIRBuilder;
- auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
- // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
- auto C_0 = B.buildConstant(NarrowTy, 0);
- auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
- UnmergeSrc.getReg(1), C_0);
- auto LoCTLZ = IsUndef ?
- B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
- B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
- auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
- auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
- auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
- B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
- MI.eraseFromParent();
- return Legalized;
- }
- return UnableToLegalize;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx,
- LLT NarrowTy) {
- if (TypeIdx != 1)
- return UnableToLegalize;
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT SrcTy = MRI.getType(SrcReg);
- unsigned NarrowSize = NarrowTy.getSizeInBits();
- if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
- const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
- MachineIRBuilder &B = MIRBuilder;
- auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
- // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
- auto C_0 = B.buildConstant(NarrowTy, 0);
- auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
- UnmergeSrc.getReg(0), C_0);
- auto HiCTTZ = IsUndef ?
- B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
- B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
- auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
- auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
- auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
- B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
- MI.eraseFromParent();
- return Legalized;
- }
- return UnableToLegalize;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx,
- LLT NarrowTy) {
- if (TypeIdx != 1)
- return UnableToLegalize;
- Register DstReg = MI.getOperand(0).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
- unsigned NarrowSize = NarrowTy.getSizeInBits();
- if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
- auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
- auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
- auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
- MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
- MI.eraseFromParent();
- return Legalized;
- }
- return UnableToLegalize;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::lowerBitCount(MachineInstr &MI) {
- unsigned Opc = MI.getOpcode();
- const auto &TII = MIRBuilder.getTII();
- auto isSupported = [this](const LegalityQuery &Q) {
- auto QAction = LI.getAction(Q).Action;
- return QAction == Legal || QAction == Libcall || QAction == Custom;
- };
- switch (Opc) {
- default:
- return UnableToLegalize;
- case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
- // This trivially expands to CTLZ.
- Observer.changingInstr(MI);
- MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
- Observer.changedInstr(MI);
- return Legalized;
- }
- case TargetOpcode::G_CTLZ: {
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT SrcTy = MRI.getType(SrcReg);
- unsigned Len = SrcTy.getSizeInBits();
- if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
- // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
- auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg);
- auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0);
- auto ICmp = MIRBuilder.buildICmp(
- CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc);
- auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
- MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
- MI.eraseFromParent();
- return Legalized;
- }
- // for now, we do this:
- // NewLen = NextPowerOf2(Len);
- // x = x | (x >> 1);
- // x = x | (x >> 2);
- // ...
- // x = x | (x >>16);
- // x = x | (x >>32); // for 64-bit input
- // Upto NewLen/2
- // return Len - popcount(x);
- //
- // Ref: "Hacker's Delight" by Henry Warren
- Register Op = SrcReg;
- unsigned NewLen = PowerOf2Ceil(Len);
- for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
- auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i);
- auto MIBOp = MIRBuilder.buildOr(
- SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt));
- Op = MIBOp.getReg(0);
- }
- auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op);
- MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len),
- MIBPop);
- MI.eraseFromParent();
- return Legalized;
- }
- case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
- // This trivially expands to CTTZ.
- Observer.changingInstr(MI);
- MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
- Observer.changedInstr(MI);
- return Legalized;
- }
- case TargetOpcode::G_CTTZ: {
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT SrcTy = MRI.getType(SrcReg);
- unsigned Len = SrcTy.getSizeInBits();
- if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
- // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
- // zero.
- auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg);
- auto Zero = MIRBuilder.buildConstant(SrcTy, 0);
- auto ICmp = MIRBuilder.buildICmp(
- CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero);
- auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
- MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
- MI.eraseFromParent();
- return Legalized;
- }
- // for now, we use: { return popcount(~x & (x - 1)); }
- // unless the target has ctlz but not ctpop, in which case we use:
- // { return 32 - nlz(~x & (x-1)); }
- // Ref: "Hacker's Delight" by Henry Warren
- auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1);
- auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
- auto MIBTmp = MIRBuilder.buildAnd(
- SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
- if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
- isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
- auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len);
- MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
- MIRBuilder.buildCTLZ(SrcTy, MIBTmp));
- MI.eraseFromParent();
- return Legalized;
- }
- MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
- MI.getOperand(1).setReg(MIBTmp.getReg(0));
- return Legalized;
- }
- case TargetOpcode::G_CTPOP: {
- Register SrcReg = MI.getOperand(1).getReg();
- LLT Ty = MRI.getType(SrcReg);
- unsigned Size = Ty.getSizeInBits();
- MachineIRBuilder &B = MIRBuilder;
- // Count set bits in blocks of 2 bits. Default approach would be
- // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
- // We use following formula instead:
- // B2Count = val - { (val >> 1) & 0x55555555 }
- // since it gives same result in blocks of 2 with one instruction less.
- auto C_1 = B.buildConstant(Ty, 1);
- auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1);
- APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55));
- auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0);
- auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
- auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi);
- // In order to get count in blocks of 4 add values from adjacent block of 2.
- // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
- auto C_2 = B.buildConstant(Ty, 2);
- auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2);
- APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33));
- auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0);
- auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
- auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
- auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
- // For count in blocks of 8 bits we don't have to mask high 4 bits before
- // addition since count value sits in range {0,...,8} and 4 bits are enough
- // to hold such binary values. After addition high 4 bits still hold count
- // of set bits in high 4 bit block, set them to zero and get 8 bit result.
- // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F
- auto C_4 = B.buildConstant(Ty, 4);
- auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4);
- auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count);
- APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F));
- auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0);
- auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
- assert(Size<=128 && "Scalar size is too large for CTPOP lower algorithm");
- // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
- // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
- auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01)));
- auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
- // Shift count result from 8 high bits to low bits.
- auto C_SizeM8 = B.buildConstant(Ty, Size - 8);
- B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
- MI.eraseFromParent();
- return Legalized;
- }
- }
- }
- // Check that (every element of) Reg is undef or not an exact multiple of BW.
- static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI,
- Register Reg, unsigned BW) {
- return matchUnaryPredicate(
- MRI, Reg,
- [=](const Constant *C) {
- // Null constant here means an undef.
- const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(C);
- return !CI || CI->getValue().urem(BW) != 0;
- },
- /*AllowUndefs*/ true);
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::lowerFunnelShiftWithInverse(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register X = MI.getOperand(1).getReg();
- Register Y = MI.getOperand(2).getReg();
- Register Z = MI.getOperand(3).getReg();
- LLT Ty = MRI.getType(Dst);
- LLT ShTy = MRI.getType(Z);
- unsigned BW = Ty.getScalarSizeInBits();
- if (!isPowerOf2_32(BW))
- return UnableToLegalize;
- const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
- unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
- if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
- // fshl X, Y, Z -> fshr X, Y, -Z
- // fshr X, Y, Z -> fshl X, Y, -Z
- auto Zero = MIRBuilder.buildConstant(ShTy, 0);
- Z = MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
- } else {
- // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
- // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
- auto One = MIRBuilder.buildConstant(ShTy, 1);
- if (IsFSHL) {
- Y = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
- X = MIRBuilder.buildLShr(Ty, X, One).getReg(0);
- } else {
- X = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
- Y = MIRBuilder.buildShl(Ty, Y, One).getReg(0);
- }
- Z = MIRBuilder.buildNot(ShTy, Z).getReg(0);
- }
- MIRBuilder.buildInstr(RevOpcode, {Dst}, {X, Y, Z});
- MI.eraseFromParent();
- return Legalized;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::lowerFunnelShiftAsShifts(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register X = MI.getOperand(1).getReg();
- Register Y = MI.getOperand(2).getReg();
- Register Z = MI.getOperand(3).getReg();
- LLT Ty = MRI.getType(Dst);
- LLT ShTy = MRI.getType(Z);
- const unsigned BW = Ty.getScalarSizeInBits();
- const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
- Register ShX, ShY;
- Register ShAmt, InvShAmt;
- // FIXME: Emit optimized urem by constant instead of letting it expand later.
- if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
- // fshl: X << C | Y >> (BW - C)
- // fshr: X << (BW - C) | Y >> C
- // where C = Z % BW is not zero
- auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
- ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
- InvShAmt = MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
- ShX = MIRBuilder.buildShl(Ty, X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
- ShY = MIRBuilder.buildLShr(Ty, Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
- } else {
- // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
- // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
- auto Mask = MIRBuilder.buildConstant(ShTy, BW - 1);
- if (isPowerOf2_32(BW)) {
- // Z % BW -> Z & (BW - 1)
- ShAmt = MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
- // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
- auto NotZ = MIRBuilder.buildNot(ShTy, Z);
- InvShAmt = MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
- } else {
- auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
- ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
- InvShAmt = MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
- }
- auto One = MIRBuilder.buildConstant(ShTy, 1);
- if (IsFSHL) {
- ShX = MIRBuilder.buildShl(Ty, X, ShAmt).getReg(0);
- auto ShY1 = MIRBuilder.buildLShr(Ty, Y, One);
- ShY = MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
- } else {
- auto ShX1 = MIRBuilder.buildShl(Ty, X, One);
- ShX = MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
- ShY = MIRBuilder.buildLShr(Ty, Y, ShAmt).getReg(0);
- }
- }
- MIRBuilder.buildOr(Dst, ShX, ShY);
- MI.eraseFromParent();
- return Legalized;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::lowerFunnelShift(MachineInstr &MI) {
- // These operations approximately do the following (while avoiding undefined
- // shifts by BW):
- // G_FSHL: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
- // G_FSHR: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
- Register Dst = MI.getOperand(0).getReg();
- LLT Ty = MRI.getType(Dst);
- LLT ShTy = MRI.getType(MI.getOperand(3).getReg());
- bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
- unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
- // TODO: Use smarter heuristic that accounts for vector legalization.
- if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action == Lower)
- return lowerFunnelShiftAsShifts(MI);
- // This only works for powers of 2, fallback to shifts if it fails.
- LegalizerHelper::LegalizeResult Result = lowerFunnelShiftWithInverse(MI);
- if (Result == UnableToLegalize)
- return lowerFunnelShiftAsShifts(MI);
- return Result;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::lowerRotateWithReverseRotate(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- Register Amt = MI.getOperand(2).getReg();
- LLT AmtTy = MRI.getType(Amt);
- auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
- bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
- unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
- auto Neg = MIRBuilder.buildSub(AmtTy, Zero, Amt);
- MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg});
- MI.eraseFromParent();
- return Legalized;
- }
- LegalizerHelper::LegalizeResult LegalizerHelper::lowerRotate(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- Register Amt = MI.getOperand(2).getReg();
- LLT DstTy = MRI.getType(Dst);
- LLT SrcTy = MRI.getType(Src);
- LLT AmtTy = MRI.getType(Amt);
- unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
- bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
- MIRBuilder.setInstrAndDebugLoc(MI);
- // If a rotate in the other direction is supported, use it.
- unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
- if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) &&
- isPowerOf2_32(EltSizeInBits))
- return lowerRotateWithReverseRotate(MI);
- // If a funnel shift is supported, use it.
- unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
- unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
- bool IsFShLegal = false;
- if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
- LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
- auto buildFunnelShift = [&](unsigned Opc, Register R1, Register R2,
- Register R3) {
- MIRBuilder.buildInstr(Opc, {R1}, {R2, R2, R3});
- MI.eraseFromParent();
- return Legalized;
- };
- // If a funnel shift in the other direction is supported, use it.
- if (IsFShLegal) {
- return buildFunnelShift(FShOpc, Dst, Src, Amt);
- } else if (isPowerOf2_32(EltSizeInBits)) {
- Amt = MIRBuilder.buildNeg(DstTy, Amt).getReg(0);
- return buildFunnelShift(RevFsh, Dst, Src, Amt);
- }
- }
- auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
- unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
- unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
- auto BitWidthMinusOneC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits - 1);
- Register ShVal;
- Register RevShiftVal;
- if (isPowerOf2_32(EltSizeInBits)) {
- // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
- // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
- auto NegAmt = MIRBuilder.buildSub(AmtTy, Zero, Amt);
- auto ShAmt = MIRBuilder.buildAnd(AmtTy, Amt, BitWidthMinusOneC);
- ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
- auto RevAmt = MIRBuilder.buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
- RevShiftVal =
- MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, RevAmt}).getReg(0);
- } else {
- // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
- // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
- auto BitWidthC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits);
- auto ShAmt = MIRBuilder.buildURem(AmtTy, Amt, BitWidthC);
- ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
- auto RevAmt = MIRBuilder.buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
- auto One = MIRBuilder.buildConstant(AmtTy, 1);
- auto Inner = MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, One});
- RevShiftVal =
- MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Inner, RevAmt}).getReg(0);
- }
- MIRBuilder.buildOr(Dst, ShVal, RevShiftVal);
- MI.eraseFromParent();
- return Legalized;
- }
- // Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
- // representation.
- LegalizerHelper::LegalizeResult
- LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- const LLT S64 = LLT::scalar(64);
- const LLT S32 = LLT::scalar(32);
- const LLT S1 = LLT::scalar(1);
- assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
- // unsigned cul2f(ulong u) {
- // uint lz = clz(u);
- // uint e = (u != 0) ? 127U + 63U - lz : 0;
- // u = (u << lz) & 0x7fffffffffffffffUL;
- // ulong t = u & 0xffffffffffUL;
- // uint v = (e << 23) | (uint)(u >> 40);
- // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
- // return as_float(v + r);
- // }
- auto Zero32 = MIRBuilder.buildConstant(S32, 0);
- auto Zero64 = MIRBuilder.buildConstant(S64, 0);
- auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
- auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
- auto Sub = MIRBuilder.buildSub(S32, K, LZ);
- auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
- auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
- auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
- auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
- auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
- auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
- auto T = MIRBuilder.buildAnd(S64, U, Mask1);
- auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
- auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
- auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
- auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
- auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
- auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
- auto One = MIRBuilder.buildConstant(S32, 1);
- auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
- auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
- auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
- MIRBuilder.buildAdd(Dst, V, R);
- MI.eraseFromParent();
- return Legalized;
- }
- LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(Dst);
- LLT SrcTy = MRI.getType(Src);
- if (SrcTy == LLT::scalar(1)) {
- auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
- auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
- MIRBuilder.buildSelect(Dst, Src, True, False);
- MI.eraseFromParent();
- return Legalized;
- }
- if (SrcTy != LLT::scalar(64))
- return UnableToLegalize;
- if (DstTy == LLT::scalar(32)) {
- // TODO: SelectionDAG has several alternative expansions to port which may
- // be more reasonble depending on the available instructions. If a target
- // has sitofp, does not have CTLZ, or can efficiently use f64 as an
- // intermediate type, this is probably worse.
- return lowerU64ToF32BitOps(MI);
- }
- return UnableToLegalize;
- }
- LegalizerHelper::LegalizeResult LegalizerHelper::lowerSITOFP(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(Dst);
- LLT SrcTy = MRI.getType(Src);
- const LLT S64 = LLT::scalar(64);
- const LLT S32 = LLT::scalar(32);
- const LLT S1 = LLT::scalar(1);
- if (SrcTy == S1) {
- auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
- auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
- MIRBuilder.buildSelect(Dst, Src, True, False);
- MI.eraseFromParent();
- return Legalized;
- }
- if (SrcTy != S64)
- return UnableToLegalize;
- if (DstTy == S32) {
- // signed cl2f(long l) {
- // long s = l >> 63;
- // float r = cul2f((l + s) ^ s);
- // return s ? -r : r;
- // }
- Register L = Src;
- auto SignBit = MIRBuilder.buildConstant(S64, 63);
- auto S = MIRBuilder.buildAShr(S64, L, SignBit);
- auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
- auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
- auto R = MIRBuilder.buildUITOFP(S32, Xor);
- auto RNeg = MIRBuilder.buildFNeg(S32, R);
- auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
- MIRBuilder.buildConstant(S64, 0));
- MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
- MI.eraseFromParent();
- return Legalized;
- }
- return UnableToLegalize;
- }
- LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOUI(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(Dst);
- LLT SrcTy = MRI.getType(Src);
- const LLT S64 = LLT::scalar(64);
- const LLT S32 = LLT::scalar(32);
- if (SrcTy != S64 && SrcTy != S32)
- return UnableToLegalize;
- if (DstTy != S32 && DstTy != S64)
- return UnableToLegalize;
- // FPTOSI gives same result as FPTOUI for positive signed integers.
- // FPTOUI needs to deal with fp values that convert to unsigned integers
- // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
- APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
- APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
- : APFloat::IEEEdouble(),
- APInt::getZero(SrcTy.getSizeInBits()));
- TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
- MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
- MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
- // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
- // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
- MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
- MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
- MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
- MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
- const LLT S1 = LLT::scalar(1);
- MachineInstrBuilder FCMP =
- MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
- MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
- MI.eraseFromParent();
- return Legalized;
- }
- LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(Dst);
- LLT SrcTy = MRI.getType(Src);
- const LLT S64 = LLT::scalar(64);
- const LLT S32 = LLT::scalar(32);
- // FIXME: Only f32 to i64 conversions are supported.
- if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64)
- return UnableToLegalize;
- // Expand f32 -> i64 conversion
- // This algorithm comes from compiler-rt's implementation of fixsfdi:
- // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
- unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
- auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000);
- auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23);
- auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
- auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
- auto SignMask = MIRBuilder.buildConstant(SrcTy,
- APInt::getSignMask(SrcEltBits));
- auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask);
- auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
- auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
- Sign = MIRBuilder.buildSExt(DstTy, Sign);
- auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
- auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
- auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000);
- auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
- R = MIRBuilder.buildZExt(DstTy, R);
- auto Bias = MIRBuilder.buildConstant(SrcTy, 127);
- auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias);
- auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit);
- auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent);
- auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent);
- auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub);
- const LLT S1 = LLT::scalar(1);
- auto CmpGt = MIRBuilder.buildICmp(CmpInst::ICMP_SGT,
- S1, Exponent, ExponentLoBit);
- R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
- auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign);
- auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign);
- auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0);
- auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT,
- S1, Exponent, ZeroSrcTy);
- auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0);
- MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
- MI.eraseFromParent();
- return Legalized;
- }
- // f64 -> f16 conversion using round-to-nearest-even rounding mode.
- LegalizerHelper::LegalizeResult
- LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
- return UnableToLegalize;
- const unsigned ExpMask = 0x7ff;
- const unsigned ExpBiasf64 = 1023;
- const unsigned ExpBiasf16 = 15;
- const LLT S32 = LLT::scalar(32);
- const LLT S1 = LLT::scalar(1);
- auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);
- Register U = Unmerge.getReg(0);
- Register UH = Unmerge.getReg(1);
- auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20));
- E = MIRBuilder.buildAnd(S32, E, MIRBuilder.buildConstant(S32, ExpMask));
- // Subtract the fp64 exponent bias (1023) to get the real exponent and
- // add the f16 bias (15) to get the biased exponent for the f16 format.
- E = MIRBuilder.buildAdd(
- S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16));
- auto M = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 8));
- M = MIRBuilder.buildAnd(S32, M, MIRBuilder.buildConstant(S32, 0xffe));
- auto MaskedSig = MIRBuilder.buildAnd(S32, UH,
- MIRBuilder.buildConstant(S32, 0x1ff));
- MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U);
- auto Zero = MIRBuilder.buildConstant(S32, 0);
- auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero);
- auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0);
- M = MIRBuilder.buildOr(S32, M, Lo40Set);
- // (M != 0 ? 0x0200 : 0) | 0x7c00;
- auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200);
- auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero);
- auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero);
- auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00);
- auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00);
- // N = M | (E << 12);
- auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12));
- auto N = MIRBuilder.buildOr(S32, M, EShl12);
- // B = clamp(1-E, 0, 13);
- auto One = MIRBuilder.buildConstant(S32, 1);
- auto OneSubExp = MIRBuilder.buildSub(S32, One, E);
- auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero);
- B = MIRBuilder.buildSMin(S32, B, MIRBuilder.buildConstant(S32, 13));
- auto SigSetHigh = MIRBuilder.buildOr(S32, M,
- MIRBuilder.buildConstant(S32, 0x1000));
- auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B);
- auto D0 = MIRBuilder.buildShl(S32, D, B);
- auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1,
- D0, SigSetHigh);
- auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh);
- D = MIRBuilder.buildOr(S32, D, D1);
- auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One);
- auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N);
- auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7));
- V = MIRBuilder.buildLShr(S32, V, MIRBuilder.buildConstant(S32, 2));
- auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3,
- MIRBuilder.buildConstant(S32, 3));
- auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3);
- auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3,
- MIRBuilder.buildConstant(S32, 5));
- auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5);
- V1 = MIRBuilder.buildOr(S32, V0, V1);
- V = MIRBuilder.buildAdd(S32, V, V1);
- auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1,
- E, MIRBuilder.buildConstant(S32, 30));
- V = MIRBuilder.buildSelect(S32, CmpEGt30,
- MIRBuilder.buildConstant(S32, 0x7c00), V);
- auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1,
- E, MIRBuilder.buildConstant(S32, 1039));
- V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V);
- // Extract the sign bit.
- auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16));
- Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000));
- // Insert the sign bit
- V = MIRBuilder.buildOr(S32, Sign, V);
- MIRBuilder.buildTrunc(Dst, V);
- MI.eraseFromParent();
- return Legalized;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(Dst);
- LLT SrcTy = MRI.getType(Src);
- const LLT S64 = LLT::scalar(64);
- const LLT S16 = LLT::scalar(16);
- if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64)
- return lowerFPTRUNC_F64_TO_F16(MI);
- return UnableToLegalize;
- }
- // TODO: If RHS is a constant SelectionDAGBuilder expands this into a
- // multiplication tree.
- LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPOWI(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src0 = MI.getOperand(1).getReg();
- Register Src1 = MI.getOperand(2).getReg();
- LLT Ty = MRI.getType(Dst);
- auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
- MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags());
- MI.eraseFromParent();
- return Legalized;
- }
- static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
- switch (Opc) {
- case TargetOpcode::G_SMIN:
- return CmpInst::ICMP_SLT;
- case TargetOpcode::G_SMAX:
- return CmpInst::ICMP_SGT;
- case TargetOpcode::G_UMIN:
- return CmpInst::ICMP_ULT;
- case TargetOpcode::G_UMAX:
- return CmpInst::ICMP_UGT;
- default:
- llvm_unreachable("not in integer min/max");
- }
- }
- LegalizerHelper::LegalizeResult LegalizerHelper::lowerMinMax(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src0 = MI.getOperand(1).getReg();
- Register Src1 = MI.getOperand(2).getReg();
- const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
- LLT CmpType = MRI.getType(Dst).changeElementSize(1);
- auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
- MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
- MI.eraseFromParent();
- return Legalized;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::lowerFCopySign(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src0 = MI.getOperand(1).getReg();
- Register Src1 = MI.getOperand(2).getReg();
- const LLT Src0Ty = MRI.getType(Src0);
- const LLT Src1Ty = MRI.getType(Src1);
- const int Src0Size = Src0Ty.getScalarSizeInBits();
- const int Src1Size = Src1Ty.getScalarSizeInBits();
- auto SignBitMask = MIRBuilder.buildConstant(
- Src0Ty, APInt::getSignMask(Src0Size));
- auto NotSignBitMask = MIRBuilder.buildConstant(
- Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
- Register And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask).getReg(0);
- Register And1;
- if (Src0Ty == Src1Ty) {
- And1 = MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask).getReg(0);
- } else if (Src0Size > Src1Size) {
- auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
- auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
- auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
- And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0);
- } else {
- auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
- auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
- auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
- And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask).getReg(0);
- }
- // Be careful about setting nsz/nnan/ninf on every instruction, since the
- // constants are a nan and -0.0, but the final result should preserve
- // everything.
- unsigned Flags = MI.getFlags();
- MIRBuilder.buildOr(Dst, And0, And1, Flags);
- MI.eraseFromParent();
- return Legalized;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {
- unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ?
- TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
- Register Dst = MI.getOperand(0).getReg();
- Register Src0 = MI.getOperand(1).getReg();
- Register Src1 = MI.getOperand(2).getReg();
- LLT Ty = MRI.getType(Dst);
- if (!MI.getFlag(MachineInstr::FmNoNans)) {
- // Insert canonicalizes if it's possible we need to quiet to get correct
- // sNaN behavior.
- // Note this must be done here, and not as an optimization combine in the
- // absence of a dedicate quiet-snan instruction as we're using an
- // omni-purpose G_FCANONICALIZE.
- if (!isKnownNeverSNaN(Src0, MRI))
- Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
- if (!isKnownNeverSNaN(Src1, MRI))
- Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
- }
- // If there are no nans, it's safe to simply replace this with the non-IEEE
- // version.
- MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
- MI.eraseFromParent();
- return Legalized;
- }
- LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) {
- // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
- Register DstReg = MI.getOperand(0).getReg();
- LLT Ty = MRI.getType(DstReg);
- unsigned Flags = MI.getFlags();
- auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
- Flags);
- MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
- MI.eraseFromParent();
- return Legalized;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) {
- Register DstReg = MI.getOperand(0).getReg();
- Register X = MI.getOperand(1).getReg();
- const unsigned Flags = MI.getFlags();
- const LLT Ty = MRI.getType(DstReg);
- const LLT CondTy = Ty.changeElementSize(1);
- // round(x) =>
- // t = trunc(x);
- // d = fabs(x - t);
- // o = copysign(1.0f, x);
- // return t + (d >= 0.5 ? o : 0.0);
- auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags);
- auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags);
- auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags);
- auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
- auto One = MIRBuilder.buildFConstant(Ty, 1.0);
- auto Half = MIRBuilder.buildFConstant(Ty, 0.5);
- auto SignOne = MIRBuilder.buildFCopysign(Ty, One, X);
- auto Cmp = MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half,
- Flags);
- auto Sel = MIRBuilder.buildSelect(Ty, Cmp, SignOne, Zero, Flags);
- MIRBuilder.buildFAdd(DstReg, T, Sel, Flags);
- MI.eraseFromParent();
- return Legalized;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::lowerFFloor(MachineInstr &MI) {
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
- unsigned Flags = MI.getFlags();
- LLT Ty = MRI.getType(DstReg);
- const LLT CondTy = Ty.changeElementSize(1);
- // result = trunc(src);
- // if (src < 0.0 && src != result)
- // result += -1.0.
- auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
- auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
- auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
- SrcReg, Zero, Flags);
- auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
- SrcReg, Trunc, Flags);
- auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
- auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
- MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
- MI.eraseFromParent();
- return Legalized;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::lowerMergeValues(MachineInstr &MI) {
- const unsigned NumOps = MI.getNumOperands();
- Register DstReg = MI.getOperand(0).getReg();
- Register Src0Reg = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT SrcTy = MRI.getType(Src0Reg);
- unsigned PartSize = SrcTy.getSizeInBits();
- LLT WideTy = LLT::scalar(DstTy.getSizeInBits());
- Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0);
- for (unsigned I = 2; I != NumOps; ++I) {
- const unsigned Offset = (I - 1) * PartSize;
- Register SrcReg = MI.getOperand(I).getReg();
- auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
- Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
- MRI.createGenericVirtualRegister(WideTy);
- auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
- auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
- MIRBuilder.buildOr(NextResult, ResultReg, Shl);
- ResultReg = NextResult;
- }
- if (DstTy.isPointer()) {
- if (MIRBuilder.getDataLayout().isNonIntegralAddressSpace(
- DstTy.getAddressSpace())) {
- LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n");
- return UnableToLegalize;
- }
- MIRBuilder.buildIntToPtr(DstReg, ResultReg);
- }
- MI.eraseFromParent();
- return Legalized;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) {
- const unsigned NumDst = MI.getNumOperands() - 1;
- Register SrcReg = MI.getOperand(NumDst).getReg();
- Register Dst0Reg = MI.getOperand(0).getReg();
- LLT DstTy = MRI.getType(Dst0Reg);
- if (DstTy.isPointer())
- return UnableToLegalize; // TODO
- SrcReg = coerceToScalar(SrcReg);
- if (!SrcReg)
- return UnableToLegalize;
- // Expand scalarizing unmerge as bitcast to integer and shift.
- LLT IntTy = MRI.getType(SrcReg);
- MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
- const unsigned DstSize = DstTy.getSizeInBits();
- unsigned Offset = DstSize;
- for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
- auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
- auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
- MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
- }
- MI.eraseFromParent();
- return Legalized;
- }
- /// Lower a vector extract or insert by writing the vector to a stack temporary
- /// and reloading the element or vector.
- ///
- /// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx
- /// =>
- /// %stack_temp = G_FRAME_INDEX
- /// G_STORE %vec, %stack_temp
- /// %idx = clamp(%idx, %vec.getNumElements())
- /// %element_ptr = G_PTR_ADD %stack_temp, %idx
- /// %dst = G_LOAD %element_ptr
- LegalizerHelper::LegalizeResult
- LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) {
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcVec = MI.getOperand(1).getReg();
- Register InsertVal;
- if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
- InsertVal = MI.getOperand(2).getReg();
- Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
- LLT VecTy = MRI.getType(SrcVec);
- LLT EltTy = VecTy.getElementType();
- unsigned NumElts = VecTy.getNumElements();
- int64_t IdxVal;
- if (mi_match(Idx, MRI, m_ICst(IdxVal)) && IdxVal <= NumElts) {
- SmallVector<Register, 8> SrcRegs;
- extractParts(SrcVec, EltTy, NumElts, SrcRegs);
- if (InsertVal) {
- SrcRegs[IdxVal] = MI.getOperand(2).getReg();
- MIRBuilder.buildMerge(DstReg, SrcRegs);
- } else {
- MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]);
- }
- MI.eraseFromParent();
- return Legalized;
- }
- if (!EltTy.isByteSized()) { // Not implemented.
- LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n");
- return UnableToLegalize;
- }
- unsigned EltBytes = EltTy.getSizeInBytes();
- Align VecAlign = getStackTemporaryAlignment(VecTy);
- Align EltAlign;
- MachinePointerInfo PtrInfo;
- auto StackTemp = createStackTemporary(TypeSize::Fixed(VecTy.getSizeInBytes()),
- VecAlign, PtrInfo);
- MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
- // Get the pointer to the element, and be sure not to hit undefined behavior
- // if the index is out of bounds.
- Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx);
- if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
- int64_t Offset = IdxVal * EltBytes;
- PtrInfo = PtrInfo.getWithOffset(Offset);
- EltAlign = commonAlignment(VecAlign, Offset);
- } else {
- // We lose information with a variable offset.
- EltAlign = getStackTemporaryAlignment(EltTy);
- PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace());
- }
- if (InsertVal) {
- // Write the inserted element
- MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
- // Reload the whole vector.
- MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
- } else {
- MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
- }
- MI.eraseFromParent();
- return Legalized;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::lowerShuffleVector(MachineInstr &MI) {
- Register DstReg = MI.getOperand(0).getReg();
- Register Src0Reg = MI.getOperand(1).getReg();
- Register Src1Reg = MI.getOperand(2).getReg();
- LLT Src0Ty = MRI.getType(Src0Reg);
- LLT DstTy = MRI.getType(DstReg);
- LLT IdxTy = LLT::scalar(32);
- ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
- if (DstTy.isScalar()) {
- if (Src0Ty.isVector())
- return UnableToLegalize;
- // This is just a SELECT.
- assert(Mask.size() == 1 && "Expected a single mask element");
- Register Val;
- if (Mask[0] < 0 || Mask[0] > 1)
- Val = MIRBuilder.buildUndef(DstTy).getReg(0);
- else
- Val = Mask[0] == 0 ? Src0Reg : Src1Reg;
- MIRBuilder.buildCopy(DstReg, Val);
- MI.eraseFromParent();
- return Legalized;
- }
- Register Undef;
- SmallVector<Register, 32> BuildVec;
- LLT EltTy = DstTy.getElementType();
- for (int Idx : Mask) {
- if (Idx < 0) {
- if (!Undef.isValid())
- Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
- BuildVec.push_back(Undef);
- continue;
- }
- if (Src0Ty.isScalar()) {
- BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg);
- } else {
- int NumElts = Src0Ty.getNumElements();
- Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
- int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
- auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
- auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK);
- BuildVec.push_back(Extract.getReg(0));
- }
- }
- MIRBuilder.buildBuildVector(DstReg, BuildVec);
- MI.eraseFromParent();
- return Legalized;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) {
- const auto &MF = *MI.getMF();
- const auto &TFI = *MF.getSubtarget().getFrameLowering();
- if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
- return UnableToLegalize;
- Register Dst = MI.getOperand(0).getReg();
- Register AllocSize = MI.getOperand(1).getReg();
- Align Alignment = assumeAligned(MI.getOperand(2).getImm());
- LLT PtrTy = MRI.getType(Dst);
- LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
- Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
- auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
- SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
- // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
- // have to generate an extra instruction to negate the alloc and then use
- // G_PTR_ADD to add the negative offset.
- auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
- if (Alignment > Align(1)) {
- APInt AlignMask(IntPtrTy.getSizeInBits(), Alignment.value(), true);
- AlignMask.negate();
- auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
- Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
- }
- SPTmp = MIRBuilder.buildCast(PtrTy, Alloc);
- MIRBuilder.buildCopy(SPReg, SPTmp);
- MIRBuilder.buildCopy(Dst, SPTmp);
- MI.eraseFromParent();
- return Legalized;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::lowerExtract(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- unsigned Offset = MI.getOperand(2).getImm();
- LLT DstTy = MRI.getType(Dst);
- LLT SrcTy = MRI.getType(Src);
- // Extract sub-vector or one element
- if (SrcTy.isVector()) {
- unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
- unsigned DstSize = DstTy.getSizeInBits();
- if ((Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
- (Offset + DstSize <= SrcTy.getSizeInBits())) {
- // Unmerge and allow access to each Src element for the artifact combiner.
- auto Unmerge = MIRBuilder.buildUnmerge(SrcTy.getElementType(), Src);
- // Take element(s) we need to extract and copy it (merge them).
- SmallVector<Register, 8> SubVectorElts;
- for (unsigned Idx = Offset / SrcEltSize;
- Idx < (Offset + DstSize) / SrcEltSize; ++Idx) {
- SubVectorElts.push_back(Unmerge.getReg(Idx));
- }
- if (SubVectorElts.size() == 1)
- MIRBuilder.buildCopy(Dst, SubVectorElts[0]);
- else
- MIRBuilder.buildMerge(Dst, SubVectorElts);
- MI.eraseFromParent();
- return Legalized;
- }
- }
- if (DstTy.isScalar() &&
- (SrcTy.isScalar() ||
- (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
- LLT SrcIntTy = SrcTy;
- if (!SrcTy.isScalar()) {
- SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
- Src = MIRBuilder.buildBitcast(SrcIntTy, Src).getReg(0);
- }
- if (Offset == 0)
- MIRBuilder.buildTrunc(Dst, Src);
- else {
- auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
- auto Shr = MIRBuilder.buildLShr(SrcIntTy, Src, ShiftAmt);
- MIRBuilder.buildTrunc(Dst, Shr);
- }
- MI.eraseFromParent();
- return Legalized;
- }
- return UnableToLegalize;
- }
- LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- Register InsertSrc = MI.getOperand(2).getReg();
- uint64_t Offset = MI.getOperand(3).getImm();
- LLT DstTy = MRI.getType(Src);
- LLT InsertTy = MRI.getType(InsertSrc);
- // Insert sub-vector or one element
- if (DstTy.isVector() && !InsertTy.isPointer()) {
- LLT EltTy = DstTy.getElementType();
- unsigned EltSize = EltTy.getSizeInBits();
- unsigned InsertSize = InsertTy.getSizeInBits();
- if ((Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
- (Offset + InsertSize <= DstTy.getSizeInBits())) {
- auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, Src);
- SmallVector<Register, 8> DstElts;
- unsigned Idx = 0;
- // Elements from Src before insert start Offset
- for (; Idx < Offset / EltSize; ++Idx) {
- DstElts.push_back(UnmergeSrc.getReg(Idx));
- }
- // Replace elements in Src with elements from InsertSrc
- if (InsertTy.getSizeInBits() > EltSize) {
- auto UnmergeInsertSrc = MIRBuilder.buildUnmerge(EltTy, InsertSrc);
- for (unsigned i = 0; Idx < (Offset + InsertSize) / EltSize;
- ++Idx, ++i) {
- DstElts.push_back(UnmergeInsertSrc.getReg(i));
- }
- } else {
- DstElts.push_back(InsertSrc);
- ++Idx;
- }
- // Remaining elements from Src after insert
- for (; Idx < DstTy.getNumElements(); ++Idx) {
- DstElts.push_back(UnmergeSrc.getReg(Idx));
- }
- MIRBuilder.buildMerge(Dst, DstElts);
- MI.eraseFromParent();
- return Legalized;
- }
- }
- if (InsertTy.isVector() ||
- (DstTy.isVector() && DstTy.getElementType() != InsertTy))
- return UnableToLegalize;
- const DataLayout &DL = MIRBuilder.getDataLayout();
- if ((DstTy.isPointer() &&
- DL.isNonIntegralAddressSpace(DstTy.getAddressSpace())) ||
- (InsertTy.isPointer() &&
- DL.isNonIntegralAddressSpace(InsertTy.getAddressSpace()))) {
- LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
- return UnableToLegalize;
- }
- LLT IntDstTy = DstTy;
- if (!DstTy.isScalar()) {
- IntDstTy = LLT::scalar(DstTy.getSizeInBits());
- Src = MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
- }
- if (!InsertTy.isScalar()) {
- const LLT IntInsertTy = LLT::scalar(InsertTy.getSizeInBits());
- InsertSrc = MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
- }
- Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
- if (Offset != 0) {
- auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
- ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
- }
- APInt MaskVal = APInt::getBitsSetWithWrap(
- DstTy.getSizeInBits(), Offset + InsertTy.getSizeInBits(), Offset);
- auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
- auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
- auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
- MIRBuilder.buildCast(Dst, Or);
- MI.eraseFromParent();
- return Legalized;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) {
- Register Dst0 = MI.getOperand(0).getReg();
- Register Dst1 = MI.getOperand(1).getReg();
- Register LHS = MI.getOperand(2).getReg();
- Register RHS = MI.getOperand(3).getReg();
- const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
- LLT Ty = MRI.getType(Dst0);
- LLT BoolTy = MRI.getType(Dst1);
- if (IsAdd)
- MIRBuilder.buildAdd(Dst0, LHS, RHS);
- else
- MIRBuilder.buildSub(Dst0, LHS, RHS);
- // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
- auto Zero = MIRBuilder.buildConstant(Ty, 0);
- // For an addition, the result should be less than one of the operands (LHS)
- // if and only if the other operand (RHS) is negative, otherwise there will
- // be overflow.
- // For a subtraction, the result should be less than one of the operands
- // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
- // otherwise there will be overflow.
- auto ResultLowerThanLHS =
- MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, Dst0, LHS);
- auto ConditionRHS = MIRBuilder.buildICmp(
- IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
- MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
- MI.eraseFromParent();
- return Legalized;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::lowerAddSubSatToMinMax(MachineInstr &MI) {
- Register Res = MI.getOperand(0).getReg();
- Register LHS = MI.getOperand(1).getReg();
- Register RHS = MI.getOperand(2).getReg();
- LLT Ty = MRI.getType(Res);
- bool IsSigned;
- bool IsAdd;
- unsigned BaseOp;
- switch (MI.getOpcode()) {
- default:
- llvm_unreachable("unexpected addsat/subsat opcode");
- case TargetOpcode::G_UADDSAT:
- IsSigned = false;
- IsAdd = true;
- BaseOp = TargetOpcode::G_ADD;
- break;
- case TargetOpcode::G_SADDSAT:
- IsSigned = true;
- IsAdd = true;
- BaseOp = TargetOpcode::G_ADD;
- break;
- case TargetOpcode::G_USUBSAT:
- IsSigned = false;
- IsAdd = false;
- BaseOp = TargetOpcode::G_SUB;
- break;
- case TargetOpcode::G_SSUBSAT:
- IsSigned = true;
- IsAdd = false;
- BaseOp = TargetOpcode::G_SUB;
- break;
- }
- if (IsSigned) {
- // sadd.sat(a, b) ->
- // hi = 0x7fffffff - smax(a, 0)
- // lo = 0x80000000 - smin(a, 0)
- // a + smin(smax(lo, b), hi)
- // ssub.sat(a, b) ->
- // lo = smax(a, -1) - 0x7fffffff
- // hi = smin(a, -1) - 0x80000000
- // a - smin(smax(lo, b), hi)
- // TODO: AMDGPU can use a "median of 3" instruction here:
- // a +/- med3(lo, b, hi)
- uint64_t NumBits = Ty.getScalarSizeInBits();
- auto MaxVal =
- MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(NumBits));
- auto MinVal =
- MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
- MachineInstrBuilder Hi, Lo;
- if (IsAdd) {
- auto Zero = MIRBuilder.buildConstant(Ty, 0);
- Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero));
- Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero));
- } else {
- auto NegOne = MIRBuilder.buildConstant(Ty, -1);
- Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne),
- MaxVal);
- Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne),
- MinVal);
- }
- auto RHSClamped =
- MIRBuilder.buildSMin(Ty, MIRBuilder.buildSMax(Ty, Lo, RHS), Hi);
- MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
- } else {
- // uadd.sat(a, b) -> a + umin(~a, b)
- // usub.sat(a, b) -> a - umin(a, b)
- Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS;
- auto Min = MIRBuilder.buildUMin(Ty, Not, RHS);
- MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
- }
- MI.eraseFromParent();
- return Legalized;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::lowerAddSubSatToAddoSubo(MachineInstr &MI) {
- Register Res = MI.getOperand(0).getReg();
- Register LHS = MI.getOperand(1).getReg();
- Register RHS = MI.getOperand(2).getReg();
- LLT Ty = MRI.getType(Res);
- LLT BoolTy = Ty.changeElementSize(1);
- bool IsSigned;
- bool IsAdd;
- unsigned OverflowOp;
- switch (MI.getOpcode()) {
- default:
- llvm_unreachable("unexpected addsat/subsat opcode");
- case TargetOpcode::G_UADDSAT:
- IsSigned = false;
- IsAdd = true;
- OverflowOp = TargetOpcode::G_UADDO;
- break;
- case TargetOpcode::G_SADDSAT:
- IsSigned = true;
- IsAdd = true;
- OverflowOp = TargetOpcode::G_SADDO;
- break;
- case TargetOpcode::G_USUBSAT:
- IsSigned = false;
- IsAdd = false;
- OverflowOp = TargetOpcode::G_USUBO;
- break;
- case TargetOpcode::G_SSUBSAT:
- IsSigned = true;
- IsAdd = false;
- OverflowOp = TargetOpcode::G_SSUBO;
- break;
- }
- auto OverflowRes =
- MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
- Register Tmp = OverflowRes.getReg(0);
- Register Ov = OverflowRes.getReg(1);
- MachineInstrBuilder Clamp;
- if (IsSigned) {
- // sadd.sat(a, b) ->
- // {tmp, ov} = saddo(a, b)
- // ov ? (tmp >>s 31) + 0x80000000 : r
- // ssub.sat(a, b) ->
- // {tmp, ov} = ssubo(a, b)
- // ov ? (tmp >>s 31) + 0x80000000 : r
- uint64_t NumBits = Ty.getScalarSizeInBits();
- auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1);
- auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
- auto MinVal =
- MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
- Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal);
- } else {
- // uadd.sat(a, b) ->
- // {tmp, ov} = uaddo(a, b)
- // ov ? 0xffffffff : tmp
- // usub.sat(a, b) ->
- // {tmp, ov} = usubo(a, b)
- // ov ? 0 : tmp
- Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
- }
- MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp);
- MI.eraseFromParent();
- return Legalized;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::lowerShlSat(MachineInstr &MI) {
- assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
- MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
- "Expected shlsat opcode!");
- bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT;
- Register Res = MI.getOperand(0).getReg();
- Register LHS = MI.getOperand(1).getReg();
- Register RHS = MI.getOperand(2).getReg();
- LLT Ty = MRI.getType(Res);
- LLT BoolTy = Ty.changeElementSize(1);
- unsigned BW = Ty.getScalarSizeInBits();
- auto Result = MIRBuilder.buildShl(Ty, LHS, RHS);
- auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS)
- : MIRBuilder.buildLShr(Ty, Result, RHS);
- MachineInstrBuilder SatVal;
- if (IsSigned) {
- auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW));
- auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW));
- auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS,
- MIRBuilder.buildConstant(Ty, 0));
- SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
- } else {
- SatVal = MIRBuilder.buildConstant(Ty, APInt::getMaxValue(BW));
- }
- auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig);
- MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
- MI.eraseFromParent();
- return Legalized;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::lowerBswap(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- const LLT Ty = MRI.getType(Src);
- unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
- unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
- // Swap most and least significant byte, set remaining bytes in Res to zero.
- auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
- auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
- auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
- auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
- // Set i-th high/low byte in Res to i-th low/high byte from Src.
- for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
- // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
- APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
- auto Mask = MIRBuilder.buildConstant(Ty, APMask);
- auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
- // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
- auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
- auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
- Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
- // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
- auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
- auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
- Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
- }
- Res.getInstr()->getOperand(0).setReg(Dst);
- MI.eraseFromParent();
- return Legalized;
- }
- //{ (Src & Mask) >> N } | { (Src << N) & Mask }
- static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B,
- MachineInstrBuilder Src, APInt Mask) {
- const LLT Ty = Dst.getLLTTy(*B.getMRI());
- MachineInstrBuilder C_N = B.buildConstant(Ty, N);
- MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
- auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
- auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
- return B.buildOr(Dst, LHS, RHS);
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::lowerBitreverse(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- const LLT Ty = MRI.getType(Src);
- unsigned Size = Ty.getSizeInBits();
- MachineInstrBuilder BSWAP =
- MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src});
- // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
- // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
- // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
- MachineInstrBuilder Swap4 =
- SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0)));
- // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
- // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
- // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
- MachineInstrBuilder Swap2 =
- SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC)));
- // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5 6|7
- // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
- // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
- SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
- MI.eraseFromParent();
- return Legalized;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::lowerReadWriteRegister(MachineInstr &MI) {
- MachineFunction &MF = MIRBuilder.getMF();
- bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
- int NameOpIdx = IsRead ? 1 : 0;
- int ValRegIndex = IsRead ? 0 : 1;
- Register ValReg = MI.getOperand(ValRegIndex).getReg();
- const LLT Ty = MRI.getType(ValReg);
- const MDString *RegStr = cast<MDString>(
- cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
- Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF);
- if (!PhysReg.isValid())
- return UnableToLegalize;
- if (IsRead)
- MIRBuilder.buildCopy(ValReg, PhysReg);
- else
- MIRBuilder.buildCopy(PhysReg, ValReg);
- MI.eraseFromParent();
- return Legalized;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::lowerSMULH_UMULH(MachineInstr &MI) {
- bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH;
- unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
- Register Result = MI.getOperand(0).getReg();
- LLT OrigTy = MRI.getType(Result);
- auto SizeInBits = OrigTy.getScalarSizeInBits();
- LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2);
- auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)});
- auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)});
- auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS);
- unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
- auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits);
- auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt});
- MIRBuilder.buildTrunc(Result, Shifted);
- MI.eraseFromParent();
- return Legalized;
- }
- LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) {
- // Implement vector G_SELECT in terms of XOR, AND, OR.
- Register DstReg = MI.getOperand(0).getReg();
- Register MaskReg = MI.getOperand(1).getReg();
- Register Op1Reg = MI.getOperand(2).getReg();
- Register Op2Reg = MI.getOperand(3).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT MaskTy = MRI.getType(MaskReg);
- LLT Op1Ty = MRI.getType(Op1Reg);
- if (!DstTy.isVector())
- return UnableToLegalize;
- // Vector selects can have a scalar predicate. If so, splat into a vector and
- // finish for later legalization attempts to try again.
- if (MaskTy.isScalar()) {
- Register MaskElt = MaskReg;
- if (MaskTy.getSizeInBits() < DstTy.getScalarSizeInBits())
- MaskElt = MIRBuilder.buildSExt(DstTy.getElementType(), MaskElt).getReg(0);
- // Generate a vector splat idiom to be pattern matched later.
- auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
- Observer.changingInstr(MI);
- MI.getOperand(1).setReg(ShufSplat.getReg(0));
- Observer.changedInstr(MI);
- return Legalized;
- }
- if (MaskTy.getSizeInBits() != Op1Ty.getSizeInBits()) {
- return UnableToLegalize;
- }
- auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg);
- auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
- auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
- MIRBuilder.buildOr(DstReg, NewOp1, NewOp2);
- MI.eraseFromParent();
- return Legalized;
- }
- LegalizerHelper::LegalizeResult LegalizerHelper::lowerDIVREM(MachineInstr &MI) {
- // Split DIVREM into individual instructions.
- unsigned Opcode = MI.getOpcode();
- MIRBuilder.buildInstr(
- Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
- : TargetOpcode::G_UDIV,
- {MI.getOperand(0).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
- MIRBuilder.buildInstr(
- Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
- : TargetOpcode::G_UREM,
- {MI.getOperand(1).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
- MI.eraseFromParent();
- return Legalized;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::lowerAbsToAddXor(MachineInstr &MI) {
- // Expand %res = G_ABS %a into:
- // %v1 = G_ASHR %a, scalar_size-1
- // %v2 = G_ADD %a, %v1
- // %res = G_XOR %v2, %v1
- LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
- Register OpReg = MI.getOperand(1).getReg();
- auto ShiftAmt =
- MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1);
- auto Shift = MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
- auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift);
- MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift);
- MI.eraseFromParent();
- return Legalized;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::lowerAbsToMaxNeg(MachineInstr &MI) {
- // Expand %res = G_ABS %a into:
- // %v1 = G_CONSTANT 0
- // %v2 = G_SUB %v1, %a
- // %res = G_SMAX %a, %v2
- Register SrcReg = MI.getOperand(1).getReg();
- LLT Ty = MRI.getType(SrcReg);
- auto Zero = MIRBuilder.buildConstant(Ty, 0).getReg(0);
- auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
- MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub);
- MI.eraseFromParent();
- return Legalized;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::lowerVectorReduction(MachineInstr &MI) {
- Register SrcReg = MI.getOperand(1).getReg();
- LLT SrcTy = MRI.getType(SrcReg);
- LLT DstTy = MRI.getType(SrcReg);
- // The source could be a scalar if the IR type was <1 x sN>.
- if (SrcTy.isScalar()) {
- if (DstTy.getSizeInBits() > SrcTy.getSizeInBits())
- return UnableToLegalize; // FIXME: handle extension.
- // This can be just a plain copy.
- Observer.changingInstr(MI);
- MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::COPY));
- Observer.changedInstr(MI);
- return Legalized;
- }
- return UnableToLegalize;;
- }
- static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
- // On Darwin, -Os means optimize for size without hurting performance, so
- // only really optimize for size when -Oz (MinSize) is used.
- if (MF.getTarget().getTargetTriple().isOSDarwin())
- return MF.getFunction().hasMinSize();
- return MF.getFunction().hasOptSize();
- }
- // Returns a list of types to use for memory op lowering in MemOps. A partial
- // port of findOptimalMemOpLowering in TargetLowering.
- static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
- unsigned Limit, const MemOp &Op,
- unsigned DstAS, unsigned SrcAS,
- const AttributeList &FuncAttributes,
- const TargetLowering &TLI) {
- if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
- return false;
- LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
- if (Ty == LLT()) {
- // Use the largest scalar type whose alignment constraints are satisfied.
- // We only need to check DstAlign here as SrcAlign is always greater or
- // equal to DstAlign (or zero).
- Ty = LLT::scalar(64);
- if (Op.isFixedDstAlign())
- while (Op.getDstAlign() < Ty.getSizeInBytes() &&
- !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
- Ty = LLT::scalar(Ty.getSizeInBytes());
- assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
- // FIXME: check for the largest legal type we can load/store to.
- }
- unsigned NumMemOps = 0;
- uint64_t Size = Op.size();
- while (Size) {
- unsigned TySize = Ty.getSizeInBytes();
- while (TySize > Size) {
- // For now, only use non-vector load / store's for the left-over pieces.
- LLT NewTy = Ty;
- // FIXME: check for mem op safety and legality of the types. Not all of
- // SDAGisms map cleanly to GISel concepts.
- if (NewTy.isVector())
- NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
- NewTy = LLT::scalar(PowerOf2Floor(NewTy.getSizeInBits() - 1));
- unsigned NewTySize = NewTy.getSizeInBytes();
- assert(NewTySize > 0 && "Could not find appropriate type");
- // If the new LLT cannot cover all of the remaining bits, then consider
- // issuing a (or a pair of) unaligned and overlapping load / store.
- bool Fast;
- // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
- MVT VT = getMVTForLLT(Ty);
- if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
- TLI.allowsMisalignedMemoryAccesses(
- VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
- MachineMemOperand::MONone, &Fast) &&
- Fast)
- TySize = Size;
- else {
- Ty = NewTy;
- TySize = NewTySize;
- }
- }
- if (++NumMemOps > Limit)
- return false;
- MemOps.push_back(Ty);
- Size -= TySize;
- }
- return true;
- }
- static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
- if (Ty.isVector())
- return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
- Ty.getNumElements());
- return IntegerType::get(C, Ty.getSizeInBits());
- }
- // Get a vectorized representation of the memset value operand, GISel edition.
- static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
- MachineRegisterInfo &MRI = *MIB.getMRI();
- unsigned NumBits = Ty.getScalarSizeInBits();
- auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
- if (!Ty.isVector() && ValVRegAndVal) {
- APInt Scalar = ValVRegAndVal->Value.truncOrSelf(8);
- APInt SplatVal = APInt::getSplat(NumBits, Scalar);
- return MIB.buildConstant(Ty, SplatVal).getReg(0);
- }
- // Extend the byte value to the larger type, and then multiply by a magic
- // value 0x010101... in order to replicate it across every byte.
- // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
- if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
- return MIB.buildConstant(Ty, 0).getReg(0);
- }
- LLT ExtType = Ty.getScalarType();
- auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
- if (NumBits > 8) {
- APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
- auto MagicMI = MIB.buildConstant(ExtType, Magic);
- Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
- }
- // For vector types create a G_BUILD_VECTOR.
- if (Ty.isVector())
- Val = MIB.buildSplatVector(Ty, Val).getReg(0);
- return Val;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::lowerMemset(MachineInstr &MI, Register Dst, Register Val,
- uint64_t KnownLen, Align Alignment,
- bool IsVolatile) {
- auto &MF = *MI.getParent()->getParent();
- const auto &TLI = *MF.getSubtarget().getTargetLowering();
- auto &DL = MF.getDataLayout();
- LLVMContext &C = MF.getFunction().getContext();
- assert(KnownLen != 0 && "Have a zero length memset length!");
- bool DstAlignCanChange = false;
- MachineFrameInfo &MFI = MF.getFrameInfo();
- bool OptSize = shouldLowerMemFuncForSize(MF);
- MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
- if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
- DstAlignCanChange = true;
- unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
- std::vector<LLT> MemOps;
- const auto &DstMMO = **MI.memoperands_begin();
- MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
- auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
- bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
- if (!findGISelOptimalMemOpLowering(MemOps, Limit,
- MemOp::Set(KnownLen, DstAlignCanChange,
- Alignment,
- /*IsZeroMemset=*/IsZeroVal,
- /*IsVolatile=*/IsVolatile),
- DstPtrInfo.getAddrSpace(), ~0u,
- MF.getFunction().getAttributes(), TLI))
- return UnableToLegalize;
- if (DstAlignCanChange) {
- // Get an estimate of the type from the LLT.
- Type *IRTy = getTypeForLLT(MemOps[0], C);
- Align NewAlign = DL.getABITypeAlign(IRTy);
- if (NewAlign > Alignment) {
- Alignment = NewAlign;
- unsigned FI = FIDef->getOperand(1).getIndex();
- // Give the stack frame object a larger alignment if needed.
- if (MFI.getObjectAlign(FI) < Alignment)
- MFI.setObjectAlignment(FI, Alignment);
- }
- }
- MachineIRBuilder MIB(MI);
- // Find the largest store and generate the bit pattern for it.
- LLT LargestTy = MemOps[0];
- for (unsigned i = 1; i < MemOps.size(); i++)
- if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
- LargestTy = MemOps[i];
- // The memset stored value is always defined as an s8, so in order to make it
- // work with larger store types we need to repeat the bit pattern across the
- // wider type.
- Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
- if (!MemSetValue)
- return UnableToLegalize;
- // Generate the stores. For each store type in the list, we generate the
- // matching store of that type to the destination address.
- LLT PtrTy = MRI.getType(Dst);
- unsigned DstOff = 0;
- unsigned Size = KnownLen;
- for (unsigned I = 0; I < MemOps.size(); I++) {
- LLT Ty = MemOps[I];
- unsigned TySize = Ty.getSizeInBytes();
- if (TySize > Size) {
- // Issuing an unaligned load / store pair that overlaps with the previous
- // pair. Adjust the offset accordingly.
- assert(I == MemOps.size() - 1 && I != 0);
- DstOff -= TySize - Size;
- }
- // If this store is smaller than the largest store see whether we can get
- // the smaller value for free with a truncate.
- Register Value = MemSetValue;
- if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
- MVT VT = getMVTForLLT(Ty);
- MVT LargestVT = getMVTForLLT(LargestTy);
- if (!LargestTy.isVector() && !Ty.isVector() &&
- TLI.isTruncateFree(LargestVT, VT))
- Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
- else
- Value = getMemsetValue(Val, Ty, MIB);
- if (!Value)
- return UnableToLegalize;
- }
- auto *StoreMMO = MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
- Register Ptr = Dst;
- if (DstOff != 0) {
- auto Offset =
- MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
- Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
- }
- MIB.buildStore(Value, Ptr, *StoreMMO);
- DstOff += Ty.getSizeInBytes();
- Size -= TySize;
- }
- MI.eraseFromParent();
- return Legalized;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::lowerMemcpyInline(MachineInstr &MI) {
- assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- Register Len = MI.getOperand(2).getReg();
- const auto *MMOIt = MI.memoperands_begin();
- const MachineMemOperand *MemOp = *MMOIt;
- bool IsVolatile = MemOp->isVolatile();
- // See if this is a constant length copy
- auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
- // FIXME: support dynamically sized G_MEMCPY_INLINE
- assert(LenVRegAndVal.hasValue() &&
- "inline memcpy with dynamic size is not yet supported");
- uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
- if (KnownLen == 0) {
- MI.eraseFromParent();
- return Legalized;
- }
- const auto &DstMMO = **MI.memoperands_begin();
- const auto &SrcMMO = **std::next(MI.memoperands_begin());
- Align DstAlign = DstMMO.getBaseAlign();
- Align SrcAlign = SrcMMO.getBaseAlign();
- return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
- IsVolatile);
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::lowerMemcpyInline(MachineInstr &MI, Register Dst, Register Src,
- uint64_t KnownLen, Align DstAlign,
- Align SrcAlign, bool IsVolatile) {
- assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
- return lowerMemcpy(MI, Dst, Src, KnownLen,
- std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
- IsVolatile);
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
- uint64_t KnownLen, uint64_t Limit, Align DstAlign,
- Align SrcAlign, bool IsVolatile) {
- auto &MF = *MI.getParent()->getParent();
- const auto &TLI = *MF.getSubtarget().getTargetLowering();
- auto &DL = MF.getDataLayout();
- LLVMContext &C = MF.getFunction().getContext();
- assert(KnownLen != 0 && "Have a zero length memcpy length!");
- bool DstAlignCanChange = false;
- MachineFrameInfo &MFI = MF.getFrameInfo();
- Align Alignment = commonAlignment(DstAlign, SrcAlign);
- MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
- if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
- DstAlignCanChange = true;
- // FIXME: infer better src pointer alignment like SelectionDAG does here.
- // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
- // if the memcpy is in a tail call position.
- std::vector<LLT> MemOps;
- const auto &DstMMO = **MI.memoperands_begin();
- const auto &SrcMMO = **std::next(MI.memoperands_begin());
- MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
- MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
- if (!findGISelOptimalMemOpLowering(
- MemOps, Limit,
- MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
- IsVolatile),
- DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
- MF.getFunction().getAttributes(), TLI))
- return UnableToLegalize;
- if (DstAlignCanChange) {
- // Get an estimate of the type from the LLT.
- Type *IRTy = getTypeForLLT(MemOps[0], C);
- Align NewAlign = DL.getABITypeAlign(IRTy);
- // Don't promote to an alignment that would require dynamic stack
- // realignment.
- const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- if (!TRI->hasStackRealignment(MF))
- while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
- NewAlign = NewAlign / 2;
- if (NewAlign > Alignment) {
- Alignment = NewAlign;
- unsigned FI = FIDef->getOperand(1).getIndex();
- // Give the stack frame object a larger alignment if needed.
- if (MFI.getObjectAlign(FI) < Alignment)
- MFI.setObjectAlignment(FI, Alignment);
- }
- }
- LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
- MachineIRBuilder MIB(MI);
- // Now we need to emit a pair of load and stores for each of the types we've
- // collected. I.e. for each type, generate a load from the source pointer of
- // that type width, and then generate a corresponding store to the dest buffer
- // of that value loaded. This can result in a sequence of loads and stores
- // mixed types, depending on what the target specifies as good types to use.
- unsigned CurrOffset = 0;
- unsigned Size = KnownLen;
- for (auto CopyTy : MemOps) {
- // Issuing an unaligned load / store pair that overlaps with the previous
- // pair. Adjust the offset accordingly.
- if (CopyTy.getSizeInBytes() > Size)
- CurrOffset -= CopyTy.getSizeInBytes() - Size;
- // Construct MMOs for the accesses.
- auto *LoadMMO =
- MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
- auto *StoreMMO =
- MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
- // Create the load.
- Register LoadPtr = Src;
- Register Offset;
- if (CurrOffset != 0) {
- LLT SrcTy = MRI.getType(Src);
- Offset = MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset)
- .getReg(0);
- LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0);
- }
- auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
- // Create the store.
- Register StorePtr = Dst;
- if (CurrOffset != 0) {
- LLT DstTy = MRI.getType(Dst);
- StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0);
- }
- MIB.buildStore(LdVal, StorePtr, *StoreMMO);
- CurrOffset += CopyTy.getSizeInBytes();
- Size -= CopyTy.getSizeInBytes();
- }
- MI.eraseFromParent();
- return Legalized;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
- uint64_t KnownLen, Align DstAlign, Align SrcAlign,
- bool IsVolatile) {
- auto &MF = *MI.getParent()->getParent();
- const auto &TLI = *MF.getSubtarget().getTargetLowering();
- auto &DL = MF.getDataLayout();
- LLVMContext &C = MF.getFunction().getContext();
- assert(KnownLen != 0 && "Have a zero length memmove length!");
- bool DstAlignCanChange = false;
- MachineFrameInfo &MFI = MF.getFrameInfo();
- bool OptSize = shouldLowerMemFuncForSize(MF);
- Align Alignment = commonAlignment(DstAlign, SrcAlign);
- MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
- if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
- DstAlignCanChange = true;
- unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
- std::vector<LLT> MemOps;
- const auto &DstMMO = **MI.memoperands_begin();
- const auto &SrcMMO = **std::next(MI.memoperands_begin());
- MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
- MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
- // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
- // to a bug in it's findOptimalMemOpLowering implementation. For now do the
- // same thing here.
- if (!findGISelOptimalMemOpLowering(
- MemOps, Limit,
- MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
- /*IsVolatile*/ true),
- DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
- MF.getFunction().getAttributes(), TLI))
- return UnableToLegalize;
- if (DstAlignCanChange) {
- // Get an estimate of the type from the LLT.
- Type *IRTy = getTypeForLLT(MemOps[0], C);
- Align NewAlign = DL.getABITypeAlign(IRTy);
- // Don't promote to an alignment that would require dynamic stack
- // realignment.
- const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- if (!TRI->hasStackRealignment(MF))
- while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
- NewAlign = NewAlign / 2;
- if (NewAlign > Alignment) {
- Alignment = NewAlign;
- unsigned FI = FIDef->getOperand(1).getIndex();
- // Give the stack frame object a larger alignment if needed.
- if (MFI.getObjectAlign(FI) < Alignment)
- MFI.setObjectAlignment(FI, Alignment);
- }
- }
- LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
- MachineIRBuilder MIB(MI);
- // Memmove requires that we perform the loads first before issuing the stores.
- // Apart from that, this loop is pretty much doing the same thing as the
- // memcpy codegen function.
- unsigned CurrOffset = 0;
- SmallVector<Register, 16> LoadVals;
- for (auto CopyTy : MemOps) {
- // Construct MMO for the load.
- auto *LoadMMO =
- MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
- // Create the load.
- Register LoadPtr = Src;
- if (CurrOffset != 0) {
- LLT SrcTy = MRI.getType(Src);
- auto Offset =
- MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset);
- LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0);
- }
- LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
- CurrOffset += CopyTy.getSizeInBytes();
- }
- CurrOffset = 0;
- for (unsigned I = 0; I < MemOps.size(); ++I) {
- LLT CopyTy = MemOps[I];
- // Now store the values loaded.
- auto *StoreMMO =
- MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
- Register StorePtr = Dst;
- if (CurrOffset != 0) {
- LLT DstTy = MRI.getType(Dst);
- auto Offset =
- MIB.buildConstant(LLT::scalar(DstTy.getSizeInBits()), CurrOffset);
- StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0);
- }
- MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
- CurrOffset += CopyTy.getSizeInBytes();
- }
- MI.eraseFromParent();
- return Legalized;
- }
- LegalizerHelper::LegalizeResult
- LegalizerHelper::lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
- const unsigned Opc = MI.getOpcode();
- // This combine is fairly complex so it's not written with a separate
- // matcher function.
- assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
- Opc == TargetOpcode::G_MEMSET) &&
- "Expected memcpy like instruction");
- auto MMOIt = MI.memoperands_begin();
- const MachineMemOperand *MemOp = *MMOIt;
- Align DstAlign = MemOp->getBaseAlign();
- Align SrcAlign;
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- Register Len = MI.getOperand(2).getReg();
- if (Opc != TargetOpcode::G_MEMSET) {
- assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
- MemOp = *(++MMOIt);
- SrcAlign = MemOp->getBaseAlign();
- }
- // See if this is a constant length copy
- auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
- if (!LenVRegAndVal)
- return UnableToLegalize;
- uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
- if (KnownLen == 0) {
- MI.eraseFromParent();
- return Legalized;
- }
- bool IsVolatile = MemOp->isVolatile();
- if (Opc == TargetOpcode::G_MEMCPY_INLINE)
- return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
- IsVolatile);
- // Don't try to optimize volatile.
- if (IsVolatile)
- return UnableToLegalize;
- if (MaxLen && KnownLen > MaxLen)
- return UnableToLegalize;
- if (Opc == TargetOpcode::G_MEMCPY) {
- auto &MF = *MI.getParent()->getParent();
- const auto &TLI = *MF.getSubtarget().getTargetLowering();
- bool OptSize = shouldLowerMemFuncForSize(MF);
- uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
- return lowerMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
- IsVolatile);
- }
- if (Opc == TargetOpcode::G_MEMMOVE)
- return lowerMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
- if (Opc == TargetOpcode::G_MEMSET)
- return lowerMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
- return UnableToLegalize;
- }
|