LegalizerHelper.cpp 279 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834683568366837683868396840684168426843684468456846684768486849685068516852685368546855685668576858685968606861686268636864686568666867686868696870687168726873687468756876687768786879688068816882688368846885688668876888688968906891689268936894689568966897689868996900690169026903690469056906690769086909691069116912691369146915691669176918691969206921692269236924692569266927692869296930693169326933693469356936693769386939694069416942694369446945694669476948694969506951695269536954695569566957695869596960696169626963696469656966696769686969697069716972697369746975697669776978697969806981698269836984698569866987698869896990699169926993699469956996699769986999700070017002700370047005700670077008700970107011701270137014701570167017701870197020702170227023702470257026702770287029703070317032703370347035703670377038703970407041704270437044704570467047704870497050705170527053705470557056705770587059706070617062706370647065706670677068706970707071707270737074707570767077707870797080708170827083708470857086708770887089709070917092709370947095709670977098709971007101710271037104710571067107710871097110711171127113711471157116711771187119712071217122712371247125712671277128712971307131713271337134713571367137713871397140714171427143714471457146714771487149715071517152715371547155715671577158715971607161716271637164716571667167716871697170717171727173717471757176717771787179718071817182718371847185718671877188718971907191719271937194719571967197719871997200720172027203720472057206720772087209721072117212721372147215721672177218721972207221722272237224722572267227722872297230723172327233723472357236723772387239724072417242724372447245724672477248724972507251725272537254725572567257725872597260726172627263726472657266726772687269727072717272727372747275727672777278727972807281728272837284728572867287728872897290729172927293729472957296729772987299730073017302730373047305730673077308730973107311731273137314731573167317731873197320732173227323732473257326732773287329733073317332733373347335733673377338733973407341734273437344734573467347734873497350735173527353735473557356735773587359736073617362736373647365736673677368736973707371737273737374737573767377737873797380738173827383738473857386738773887389739073917392739373947395739673977398739974007401740274037404740574067407740874097410741174127413741474157416741774187419742074217422742374247425742674277428742974307431743274337434743574367437743874397440744174427443744474457446744774487449745074517452745374547455745674577458745974607461746274637464746574667467746874697470747174727473747474757476747774787479748074817482748374847485748674877488748974907491749274937494749574967497749874997500750175027503750475057506750775087509751075117512751375147515751675177518751975207521752275237524752575267527752875297530753175327533753475357536753775387539754075417542754375447545754675477548754975507551755275537554755575567557755875597560756175627563756475657566756775687569757075717572757375747575757675777578757975807581758275837584758575867587758875897590759175927593759475957596759775987599760076017602760376047605760676077608760976107611761276137614761576167617761876197620762176227623762476257626762776287629763076317632763376347635763676377638763976407641764276437644764576467647764876497650765176527653765476557656765776587659766076617662766376647665766676677668766976707671767276737674767576767677767876797680768176827683768476857686768776887689769076917692769376947695769676977698769977007701770277037704770577067707770877097710771177127713771477157716771777187719772077217722772377247725772677277728772977307731773277337734773577367737773877397740774177427743774477457746774777487749775077517752775377547755775677577758775977607761776277637764776577667767776877697770777177727773777477757776777777787779778077817782778377847785778677877788778977907791779277937794779577967797779877997800780178027803780478057806780778087809781078117812781378147815781678177818781978207821782278237824782578267827782878297830783178327833783478357836783778387839784078417842784378447845784678477848784978507851785278537854785578567857785878597860786178627863786478657866786778687869787078717872
  1. //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. /// \file This file implements the LegalizerHelper class to legalize
  10. /// individual instructions and the LegalizeMachineIR wrapper pass for the
  11. /// primary legalization.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
  15. #include "llvm/CodeGen/GlobalISel/CallLowering.h"
  16. #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
  17. #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
  18. #include "llvm/CodeGen/GlobalISel/LostDebugLocObserver.h"
  19. #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
  20. #include "llvm/CodeGen/GlobalISel/Utils.h"
  21. #include "llvm/CodeGen/MachineRegisterInfo.h"
  22. #include "llvm/CodeGen/TargetFrameLowering.h"
  23. #include "llvm/CodeGen/TargetInstrInfo.h"
  24. #include "llvm/CodeGen/TargetLowering.h"
  25. #include "llvm/CodeGen/TargetOpcodes.h"
  26. #include "llvm/CodeGen/TargetSubtargetInfo.h"
  27. #include "llvm/IR/Instructions.h"
  28. #include "llvm/Support/Debug.h"
  29. #include "llvm/Support/MathExtras.h"
  30. #include "llvm/Support/raw_ostream.h"
  31. #include "llvm/Target/TargetMachine.h"
  32. #define DEBUG_TYPE "legalizer"
  33. using namespace llvm;
  34. using namespace LegalizeActions;
  35. using namespace MIPatternMatch;
  36. /// Try to break down \p OrigTy into \p NarrowTy sized pieces.
  37. ///
  38. /// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
  39. /// with any leftover piece as type \p LeftoverTy
  40. ///
  41. /// Returns -1 in the first element of the pair if the breakdown is not
  42. /// satisfiable.
  43. static std::pair<int, int>
  44. getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
  45. assert(!LeftoverTy.isValid() && "this is an out argument");
  46. unsigned Size = OrigTy.getSizeInBits();
  47. unsigned NarrowSize = NarrowTy.getSizeInBits();
  48. unsigned NumParts = Size / NarrowSize;
  49. unsigned LeftoverSize = Size - NumParts * NarrowSize;
  50. assert(Size > NarrowSize);
  51. if (LeftoverSize == 0)
  52. return {NumParts, 0};
  53. if (NarrowTy.isVector()) {
  54. unsigned EltSize = OrigTy.getScalarSizeInBits();
  55. if (LeftoverSize % EltSize != 0)
  56. return {-1, -1};
  57. LeftoverTy = LLT::scalarOrVector(
  58. ElementCount::getFixed(LeftoverSize / EltSize), EltSize);
  59. } else {
  60. LeftoverTy = LLT::scalar(LeftoverSize);
  61. }
  62. int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
  63. return std::make_pair(NumParts, NumLeftover);
  64. }
  65. static Type *getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty) {
  66. if (!Ty.isScalar())
  67. return nullptr;
  68. switch (Ty.getSizeInBits()) {
  69. case 16:
  70. return Type::getHalfTy(Ctx);
  71. case 32:
  72. return Type::getFloatTy(Ctx);
  73. case 64:
  74. return Type::getDoubleTy(Ctx);
  75. case 80:
  76. return Type::getX86_FP80Ty(Ctx);
  77. case 128:
  78. return Type::getFP128Ty(Ctx);
  79. default:
  80. return nullptr;
  81. }
  82. }
  83. LegalizerHelper::LegalizerHelper(MachineFunction &MF,
  84. GISelChangeObserver &Observer,
  85. MachineIRBuilder &Builder)
  86. : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
  87. LI(*MF.getSubtarget().getLegalizerInfo()),
  88. TLI(*MF.getSubtarget().getTargetLowering()) { }
  89. LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
  90. GISelChangeObserver &Observer,
  91. MachineIRBuilder &B)
  92. : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
  93. TLI(*MF.getSubtarget().getTargetLowering()) { }
  94. LegalizerHelper::LegalizeResult
  95. LegalizerHelper::legalizeInstrStep(MachineInstr &MI,
  96. LostDebugLocObserver &LocObserver) {
  97. LLVM_DEBUG(dbgs() << "Legalizing: " << MI);
  98. MIRBuilder.setInstrAndDebugLoc(MI);
  99. if (MI.getOpcode() == TargetOpcode::G_INTRINSIC ||
  100. MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
  101. return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
  102. auto Step = LI.getAction(MI, MRI);
  103. switch (Step.Action) {
  104. case Legal:
  105. LLVM_DEBUG(dbgs() << ".. Already legal\n");
  106. return AlreadyLegal;
  107. case Libcall:
  108. LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
  109. return libcall(MI, LocObserver);
  110. case NarrowScalar:
  111. LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
  112. return narrowScalar(MI, Step.TypeIdx, Step.NewType);
  113. case WidenScalar:
  114. LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
  115. return widenScalar(MI, Step.TypeIdx, Step.NewType);
  116. case Bitcast:
  117. LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
  118. return bitcast(MI, Step.TypeIdx, Step.NewType);
  119. case Lower:
  120. LLVM_DEBUG(dbgs() << ".. Lower\n");
  121. return lower(MI, Step.TypeIdx, Step.NewType);
  122. case FewerElements:
  123. LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
  124. return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
  125. case MoreElements:
  126. LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
  127. return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
  128. case Custom:
  129. LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
  130. return LI.legalizeCustom(*this, MI) ? Legalized : UnableToLegalize;
  131. default:
  132. LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
  133. return UnableToLegalize;
  134. }
  135. }
  136. void LegalizerHelper::extractParts(Register Reg, LLT Ty, int NumParts,
  137. SmallVectorImpl<Register> &VRegs) {
  138. for (int i = 0; i < NumParts; ++i)
  139. VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
  140. MIRBuilder.buildUnmerge(VRegs, Reg);
  141. }
  142. bool LegalizerHelper::extractParts(Register Reg, LLT RegTy,
  143. LLT MainTy, LLT &LeftoverTy,
  144. SmallVectorImpl<Register> &VRegs,
  145. SmallVectorImpl<Register> &LeftoverRegs) {
  146. assert(!LeftoverTy.isValid() && "this is an out argument");
  147. unsigned RegSize = RegTy.getSizeInBits();
  148. unsigned MainSize = MainTy.getSizeInBits();
  149. unsigned NumParts = RegSize / MainSize;
  150. unsigned LeftoverSize = RegSize - NumParts * MainSize;
  151. // Use an unmerge when possible.
  152. if (LeftoverSize == 0) {
  153. for (unsigned I = 0; I < NumParts; ++I)
  154. VRegs.push_back(MRI.createGenericVirtualRegister(MainTy));
  155. MIRBuilder.buildUnmerge(VRegs, Reg);
  156. return true;
  157. }
  158. // Perform irregular split. Leftover is last element of RegPieces.
  159. if (MainTy.isVector()) {
  160. SmallVector<Register, 8> RegPieces;
  161. extractVectorParts(Reg, MainTy.getNumElements(), RegPieces);
  162. for (unsigned i = 0; i < RegPieces.size() - 1; ++i)
  163. VRegs.push_back(RegPieces[i]);
  164. LeftoverRegs.push_back(RegPieces[RegPieces.size() - 1]);
  165. LeftoverTy = MRI.getType(LeftoverRegs[0]);
  166. return true;
  167. }
  168. LeftoverTy = LLT::scalar(LeftoverSize);
  169. // For irregular sizes, extract the individual parts.
  170. for (unsigned I = 0; I != NumParts; ++I) {
  171. Register NewReg = MRI.createGenericVirtualRegister(MainTy);
  172. VRegs.push_back(NewReg);
  173. MIRBuilder.buildExtract(NewReg, Reg, MainSize * I);
  174. }
  175. for (unsigned Offset = MainSize * NumParts; Offset < RegSize;
  176. Offset += LeftoverSize) {
  177. Register NewReg = MRI.createGenericVirtualRegister(LeftoverTy);
  178. LeftoverRegs.push_back(NewReg);
  179. MIRBuilder.buildExtract(NewReg, Reg, Offset);
  180. }
  181. return true;
  182. }
  183. void LegalizerHelper::extractVectorParts(Register Reg, unsigned NumElts,
  184. SmallVectorImpl<Register> &VRegs) {
  185. LLT RegTy = MRI.getType(Reg);
  186. assert(RegTy.isVector() && "Expected a vector type");
  187. LLT EltTy = RegTy.getElementType();
  188. LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
  189. unsigned RegNumElts = RegTy.getNumElements();
  190. unsigned LeftoverNumElts = RegNumElts % NumElts;
  191. unsigned NumNarrowTyPieces = RegNumElts / NumElts;
  192. // Perfect split without leftover
  193. if (LeftoverNumElts == 0)
  194. return extractParts(Reg, NarrowTy, NumNarrowTyPieces, VRegs);
  195. // Irregular split. Provide direct access to all elements for artifact
  196. // combiner using unmerge to elements. Then build vectors with NumElts
  197. // elements. Remaining element(s) will be (used to build vector) Leftover.
  198. SmallVector<Register, 8> Elts;
  199. extractParts(Reg, EltTy, RegNumElts, Elts);
  200. unsigned Offset = 0;
  201. // Requested sub-vectors of NarrowTy.
  202. for (unsigned i = 0; i < NumNarrowTyPieces; ++i, Offset += NumElts) {
  203. ArrayRef<Register> Pieces(&Elts[Offset], NumElts);
  204. VRegs.push_back(MIRBuilder.buildMerge(NarrowTy, Pieces).getReg(0));
  205. }
  206. // Leftover element(s).
  207. if (LeftoverNumElts == 1) {
  208. VRegs.push_back(Elts[Offset]);
  209. } else {
  210. LLT LeftoverTy = LLT::fixed_vector(LeftoverNumElts, EltTy);
  211. ArrayRef<Register> Pieces(&Elts[Offset], LeftoverNumElts);
  212. VRegs.push_back(MIRBuilder.buildMerge(LeftoverTy, Pieces).getReg(0));
  213. }
  214. }
  215. void LegalizerHelper::insertParts(Register DstReg,
  216. LLT ResultTy, LLT PartTy,
  217. ArrayRef<Register> PartRegs,
  218. LLT LeftoverTy,
  219. ArrayRef<Register> LeftoverRegs) {
  220. if (!LeftoverTy.isValid()) {
  221. assert(LeftoverRegs.empty());
  222. if (!ResultTy.isVector()) {
  223. MIRBuilder.buildMerge(DstReg, PartRegs);
  224. return;
  225. }
  226. if (PartTy.isVector())
  227. MIRBuilder.buildConcatVectors(DstReg, PartRegs);
  228. else
  229. MIRBuilder.buildBuildVector(DstReg, PartRegs);
  230. return;
  231. }
  232. // Merge sub-vectors with different number of elements and insert into DstReg.
  233. if (ResultTy.isVector()) {
  234. assert(LeftoverRegs.size() == 1 && "Expected one leftover register");
  235. SmallVector<Register, 8> AllRegs;
  236. for (auto Reg : concat<const Register>(PartRegs, LeftoverRegs))
  237. AllRegs.push_back(Reg);
  238. return mergeMixedSubvectors(DstReg, AllRegs);
  239. }
  240. SmallVector<Register> GCDRegs;
  241. LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy);
  242. for (auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
  243. extractGCDType(GCDRegs, GCDTy, PartReg);
  244. LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
  245. buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
  246. }
  247. void LegalizerHelper::appendVectorElts(SmallVectorImpl<Register> &Elts,
  248. Register Reg) {
  249. LLT Ty = MRI.getType(Reg);
  250. SmallVector<Register, 8> RegElts;
  251. extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts);
  252. Elts.append(RegElts);
  253. }
  254. /// Merge \p PartRegs with different types into \p DstReg.
  255. void LegalizerHelper::mergeMixedSubvectors(Register DstReg,
  256. ArrayRef<Register> PartRegs) {
  257. SmallVector<Register, 8> AllElts;
  258. for (unsigned i = 0; i < PartRegs.size() - 1; ++i)
  259. appendVectorElts(AllElts, PartRegs[i]);
  260. Register Leftover = PartRegs[PartRegs.size() - 1];
  261. if (MRI.getType(Leftover).isScalar())
  262. AllElts.push_back(Leftover);
  263. else
  264. appendVectorElts(AllElts, Leftover);
  265. MIRBuilder.buildMerge(DstReg, AllElts);
  266. }
  267. /// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
  268. static void getUnmergeResults(SmallVectorImpl<Register> &Regs,
  269. const MachineInstr &MI) {
  270. assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
  271. const int StartIdx = Regs.size();
  272. const int NumResults = MI.getNumOperands() - 1;
  273. Regs.resize(Regs.size() + NumResults);
  274. for (int I = 0; I != NumResults; ++I)
  275. Regs[StartIdx + I] = MI.getOperand(I).getReg();
  276. }
  277. void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
  278. LLT GCDTy, Register SrcReg) {
  279. LLT SrcTy = MRI.getType(SrcReg);
  280. if (SrcTy == GCDTy) {
  281. // If the source already evenly divides the result type, we don't need to do
  282. // anything.
  283. Parts.push_back(SrcReg);
  284. } else {
  285. // Need to split into common type sized pieces.
  286. auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
  287. getUnmergeResults(Parts, *Unmerge);
  288. }
  289. }
  290. LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
  291. LLT NarrowTy, Register SrcReg) {
  292. LLT SrcTy = MRI.getType(SrcReg);
  293. LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
  294. extractGCDType(Parts, GCDTy, SrcReg);
  295. return GCDTy;
  296. }
  297. LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
  298. SmallVectorImpl<Register> &VRegs,
  299. unsigned PadStrategy) {
  300. LLT LCMTy = getLCMType(DstTy, NarrowTy);
  301. int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
  302. int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
  303. int NumOrigSrc = VRegs.size();
  304. Register PadReg;
  305. // Get a value we can use to pad the source value if the sources won't evenly
  306. // cover the result type.
  307. if (NumOrigSrc < NumParts * NumSubParts) {
  308. if (PadStrategy == TargetOpcode::G_ZEXT)
  309. PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
  310. else if (PadStrategy == TargetOpcode::G_ANYEXT)
  311. PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
  312. else {
  313. assert(PadStrategy == TargetOpcode::G_SEXT);
  314. // Shift the sign bit of the low register through the high register.
  315. auto ShiftAmt =
  316. MIRBuilder.buildConstant(LLT::scalar(64), GCDTy.getSizeInBits() - 1);
  317. PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
  318. }
  319. }
  320. // Registers for the final merge to be produced.
  321. SmallVector<Register, 4> Remerge(NumParts);
  322. // Registers needed for intermediate merges, which will be merged into a
  323. // source for Remerge.
  324. SmallVector<Register, 4> SubMerge(NumSubParts);
  325. // Once we've fully read off the end of the original source bits, we can reuse
  326. // the same high bits for remaining padding elements.
  327. Register AllPadReg;
  328. // Build merges to the LCM type to cover the original result type.
  329. for (int I = 0; I != NumParts; ++I) {
  330. bool AllMergePartsArePadding = true;
  331. // Build the requested merges to the requested type.
  332. for (int J = 0; J != NumSubParts; ++J) {
  333. int Idx = I * NumSubParts + J;
  334. if (Idx >= NumOrigSrc) {
  335. SubMerge[J] = PadReg;
  336. continue;
  337. }
  338. SubMerge[J] = VRegs[Idx];
  339. // There are meaningful bits here we can't reuse later.
  340. AllMergePartsArePadding = false;
  341. }
  342. // If we've filled up a complete piece with padding bits, we can directly
  343. // emit the natural sized constant if applicable, rather than a merge of
  344. // smaller constants.
  345. if (AllMergePartsArePadding && !AllPadReg) {
  346. if (PadStrategy == TargetOpcode::G_ANYEXT)
  347. AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
  348. else if (PadStrategy == TargetOpcode::G_ZEXT)
  349. AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
  350. // If this is a sign extension, we can't materialize a trivial constant
  351. // with the right type and have to produce a merge.
  352. }
  353. if (AllPadReg) {
  354. // Avoid creating additional instructions if we're just adding additional
  355. // copies of padding bits.
  356. Remerge[I] = AllPadReg;
  357. continue;
  358. }
  359. if (NumSubParts == 1)
  360. Remerge[I] = SubMerge[0];
  361. else
  362. Remerge[I] = MIRBuilder.buildMerge(NarrowTy, SubMerge).getReg(0);
  363. // In the sign extend padding case, re-use the first all-signbit merge.
  364. if (AllMergePartsArePadding && !AllPadReg)
  365. AllPadReg = Remerge[I];
  366. }
  367. VRegs = std::move(Remerge);
  368. return LCMTy;
  369. }
  370. void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
  371. ArrayRef<Register> RemergeRegs) {
  372. LLT DstTy = MRI.getType(DstReg);
  373. // Create the merge to the widened source, and extract the relevant bits into
  374. // the result.
  375. if (DstTy == LCMTy) {
  376. MIRBuilder.buildMerge(DstReg, RemergeRegs);
  377. return;
  378. }
  379. auto Remerge = MIRBuilder.buildMerge(LCMTy, RemergeRegs);
  380. if (DstTy.isScalar() && LCMTy.isScalar()) {
  381. MIRBuilder.buildTrunc(DstReg, Remerge);
  382. return;
  383. }
  384. if (LCMTy.isVector()) {
  385. unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
  386. SmallVector<Register, 8> UnmergeDefs(NumDefs);
  387. UnmergeDefs[0] = DstReg;
  388. for (unsigned I = 1; I != NumDefs; ++I)
  389. UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
  390. MIRBuilder.buildUnmerge(UnmergeDefs,
  391. MIRBuilder.buildMerge(LCMTy, RemergeRegs));
  392. return;
  393. }
  394. llvm_unreachable("unhandled case");
  395. }
  396. static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
  397. #define RTLIBCASE_INT(LibcallPrefix) \
  398. do { \
  399. switch (Size) { \
  400. case 32: \
  401. return RTLIB::LibcallPrefix##32; \
  402. case 64: \
  403. return RTLIB::LibcallPrefix##64; \
  404. case 128: \
  405. return RTLIB::LibcallPrefix##128; \
  406. default: \
  407. llvm_unreachable("unexpected size"); \
  408. } \
  409. } while (0)
  410. #define RTLIBCASE(LibcallPrefix) \
  411. do { \
  412. switch (Size) { \
  413. case 32: \
  414. return RTLIB::LibcallPrefix##32; \
  415. case 64: \
  416. return RTLIB::LibcallPrefix##64; \
  417. case 80: \
  418. return RTLIB::LibcallPrefix##80; \
  419. case 128: \
  420. return RTLIB::LibcallPrefix##128; \
  421. default: \
  422. llvm_unreachable("unexpected size"); \
  423. } \
  424. } while (0)
  425. switch (Opcode) {
  426. case TargetOpcode::G_SDIV:
  427. RTLIBCASE_INT(SDIV_I);
  428. case TargetOpcode::G_UDIV:
  429. RTLIBCASE_INT(UDIV_I);
  430. case TargetOpcode::G_SREM:
  431. RTLIBCASE_INT(SREM_I);
  432. case TargetOpcode::G_UREM:
  433. RTLIBCASE_INT(UREM_I);
  434. case TargetOpcode::G_CTLZ_ZERO_UNDEF:
  435. RTLIBCASE_INT(CTLZ_I);
  436. case TargetOpcode::G_FADD:
  437. RTLIBCASE(ADD_F);
  438. case TargetOpcode::G_FSUB:
  439. RTLIBCASE(SUB_F);
  440. case TargetOpcode::G_FMUL:
  441. RTLIBCASE(MUL_F);
  442. case TargetOpcode::G_FDIV:
  443. RTLIBCASE(DIV_F);
  444. case TargetOpcode::G_FEXP:
  445. RTLIBCASE(EXP_F);
  446. case TargetOpcode::G_FEXP2:
  447. RTLIBCASE(EXP2_F);
  448. case TargetOpcode::G_FREM:
  449. RTLIBCASE(REM_F);
  450. case TargetOpcode::G_FPOW:
  451. RTLIBCASE(POW_F);
  452. case TargetOpcode::G_FMA:
  453. RTLIBCASE(FMA_F);
  454. case TargetOpcode::G_FSIN:
  455. RTLIBCASE(SIN_F);
  456. case TargetOpcode::G_FCOS:
  457. RTLIBCASE(COS_F);
  458. case TargetOpcode::G_FLOG10:
  459. RTLIBCASE(LOG10_F);
  460. case TargetOpcode::G_FLOG:
  461. RTLIBCASE(LOG_F);
  462. case TargetOpcode::G_FLOG2:
  463. RTLIBCASE(LOG2_F);
  464. case TargetOpcode::G_FCEIL:
  465. RTLIBCASE(CEIL_F);
  466. case TargetOpcode::G_FFLOOR:
  467. RTLIBCASE(FLOOR_F);
  468. case TargetOpcode::G_FMINNUM:
  469. RTLIBCASE(FMIN_F);
  470. case TargetOpcode::G_FMAXNUM:
  471. RTLIBCASE(FMAX_F);
  472. case TargetOpcode::G_FSQRT:
  473. RTLIBCASE(SQRT_F);
  474. case TargetOpcode::G_FRINT:
  475. RTLIBCASE(RINT_F);
  476. case TargetOpcode::G_FNEARBYINT:
  477. RTLIBCASE(NEARBYINT_F);
  478. case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
  479. RTLIBCASE(ROUNDEVEN_F);
  480. }
  481. llvm_unreachable("Unknown libcall function");
  482. }
  483. /// True if an instruction is in tail position in its caller. Intended for
  484. /// legalizing libcalls as tail calls when possible.
  485. static bool isLibCallInTailPosition(MachineInstr &MI,
  486. const TargetInstrInfo &TII,
  487. MachineRegisterInfo &MRI) {
  488. MachineBasicBlock &MBB = *MI.getParent();
  489. const Function &F = MBB.getParent()->getFunction();
  490. // Conservatively require the attributes of the call to match those of
  491. // the return. Ignore NoAlias and NonNull because they don't affect the
  492. // call sequence.
  493. AttributeList CallerAttrs = F.getAttributes();
  494. if (AttrBuilder(F.getContext(), CallerAttrs.getRetAttrs())
  495. .removeAttribute(Attribute::NoAlias)
  496. .removeAttribute(Attribute::NonNull)
  497. .hasAttributes())
  498. return false;
  499. // It's not safe to eliminate the sign / zero extension of the return value.
  500. if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
  501. CallerAttrs.hasRetAttr(Attribute::SExt))
  502. return false;
  503. // Only tail call if the following instruction is a standard return or if we
  504. // have a `thisreturn` callee, and a sequence like:
  505. //
  506. // G_MEMCPY %0, %1, %2
  507. // $x0 = COPY %0
  508. // RET_ReallyLR implicit $x0
  509. auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
  510. if (Next != MBB.instr_end() && Next->isCopy()) {
  511. switch (MI.getOpcode()) {
  512. default:
  513. llvm_unreachable("unsupported opcode");
  514. case TargetOpcode::G_BZERO:
  515. return false;
  516. case TargetOpcode::G_MEMCPY:
  517. case TargetOpcode::G_MEMMOVE:
  518. case TargetOpcode::G_MEMSET:
  519. break;
  520. }
  521. Register VReg = MI.getOperand(0).getReg();
  522. if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg())
  523. return false;
  524. Register PReg = Next->getOperand(0).getReg();
  525. if (!PReg.isPhysical())
  526. return false;
  527. auto Ret = next_nodbg(Next, MBB.instr_end());
  528. if (Ret == MBB.instr_end() || !Ret->isReturn())
  529. return false;
  530. if (Ret->getNumImplicitOperands() != 1)
  531. return false;
  532. if (PReg != Ret->getOperand(0).getReg())
  533. return false;
  534. // Skip over the COPY that we just validated.
  535. Next = Ret;
  536. }
  537. if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
  538. return false;
  539. return true;
  540. }
  541. LegalizerHelper::LegalizeResult
  542. llvm::createLibcall(MachineIRBuilder &MIRBuilder, const char *Name,
  543. const CallLowering::ArgInfo &Result,
  544. ArrayRef<CallLowering::ArgInfo> Args,
  545. const CallingConv::ID CC) {
  546. auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
  547. CallLowering::CallLoweringInfo Info;
  548. Info.CallConv = CC;
  549. Info.Callee = MachineOperand::CreateES(Name);
  550. Info.OrigRet = Result;
  551. std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
  552. if (!CLI.lowerCall(MIRBuilder, Info))
  553. return LegalizerHelper::UnableToLegalize;
  554. return LegalizerHelper::Legalized;
  555. }
  556. LegalizerHelper::LegalizeResult
  557. llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
  558. const CallLowering::ArgInfo &Result,
  559. ArrayRef<CallLowering::ArgInfo> Args) {
  560. auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
  561. const char *Name = TLI.getLibcallName(Libcall);
  562. const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
  563. return createLibcall(MIRBuilder, Name, Result, Args, CC);
  564. }
  565. // Useful for libcalls where all operands have the same type.
  566. static LegalizerHelper::LegalizeResult
  567. simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
  568. Type *OpType) {
  569. auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
  570. // FIXME: What does the original arg index mean here?
  571. SmallVector<CallLowering::ArgInfo, 3> Args;
  572. for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
  573. Args.push_back({MO.getReg(), OpType, 0});
  574. return createLibcall(MIRBuilder, Libcall,
  575. {MI.getOperand(0).getReg(), OpType, 0}, Args);
  576. }
  577. LegalizerHelper::LegalizeResult
  578. llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
  579. MachineInstr &MI, LostDebugLocObserver &LocObserver) {
  580. auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
  581. SmallVector<CallLowering::ArgInfo, 3> Args;
  582. // Add all the args, except for the last which is an imm denoting 'tail'.
  583. for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
  584. Register Reg = MI.getOperand(i).getReg();
  585. // Need derive an IR type for call lowering.
  586. LLT OpLLT = MRI.getType(Reg);
  587. Type *OpTy = nullptr;
  588. if (OpLLT.isPointer())
  589. OpTy = Type::getInt8PtrTy(Ctx, OpLLT.getAddressSpace());
  590. else
  591. OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
  592. Args.push_back({Reg, OpTy, 0});
  593. }
  594. auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
  595. auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
  596. RTLIB::Libcall RTLibcall;
  597. unsigned Opc = MI.getOpcode();
  598. switch (Opc) {
  599. case TargetOpcode::G_BZERO:
  600. RTLibcall = RTLIB::BZERO;
  601. break;
  602. case TargetOpcode::G_MEMCPY:
  603. RTLibcall = RTLIB::MEMCPY;
  604. Args[0].Flags[0].setReturned();
  605. break;
  606. case TargetOpcode::G_MEMMOVE:
  607. RTLibcall = RTLIB::MEMMOVE;
  608. Args[0].Flags[0].setReturned();
  609. break;
  610. case TargetOpcode::G_MEMSET:
  611. RTLibcall = RTLIB::MEMSET;
  612. Args[0].Flags[0].setReturned();
  613. break;
  614. default:
  615. llvm_unreachable("unsupported opcode");
  616. }
  617. const char *Name = TLI.getLibcallName(RTLibcall);
  618. // Unsupported libcall on the target.
  619. if (!Name) {
  620. LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
  621. << MIRBuilder.getTII().getName(Opc) << "\n");
  622. return LegalizerHelper::UnableToLegalize;
  623. }
  624. CallLowering::CallLoweringInfo Info;
  625. Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
  626. Info.Callee = MachineOperand::CreateES(Name);
  627. Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0);
  628. Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() &&
  629. isLibCallInTailPosition(MI, MIRBuilder.getTII(), MRI);
  630. std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
  631. if (!CLI.lowerCall(MIRBuilder, Info))
  632. return LegalizerHelper::UnableToLegalize;
  633. if (Info.LoweredTailCall) {
  634. assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
  635. // Check debug locations before removing the return.
  636. LocObserver.checkpoint(true);
  637. // We must have a return following the call (or debug insts) to get past
  638. // isLibCallInTailPosition.
  639. do {
  640. MachineInstr *Next = MI.getNextNode();
  641. assert(Next &&
  642. (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
  643. "Expected instr following MI to be return or debug inst?");
  644. // We lowered a tail call, so the call is now the return from the block.
  645. // Delete the old return.
  646. Next->eraseFromParent();
  647. } while (MI.getNextNode());
  648. // We expect to lose the debug location from the return.
  649. LocObserver.checkpoint(false);
  650. }
  651. return LegalizerHelper::Legalized;
  652. }
  653. static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
  654. Type *FromType) {
  655. auto ToMVT = MVT::getVT(ToType);
  656. auto FromMVT = MVT::getVT(FromType);
  657. switch (Opcode) {
  658. case TargetOpcode::G_FPEXT:
  659. return RTLIB::getFPEXT(FromMVT, ToMVT);
  660. case TargetOpcode::G_FPTRUNC:
  661. return RTLIB::getFPROUND(FromMVT, ToMVT);
  662. case TargetOpcode::G_FPTOSI:
  663. return RTLIB::getFPTOSINT(FromMVT, ToMVT);
  664. case TargetOpcode::G_FPTOUI:
  665. return RTLIB::getFPTOUINT(FromMVT, ToMVT);
  666. case TargetOpcode::G_SITOFP:
  667. return RTLIB::getSINTTOFP(FromMVT, ToMVT);
  668. case TargetOpcode::G_UITOFP:
  669. return RTLIB::getUINTTOFP(FromMVT, ToMVT);
  670. }
  671. llvm_unreachable("Unsupported libcall function");
  672. }
  673. static LegalizerHelper::LegalizeResult
  674. conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType,
  675. Type *FromType) {
  676. RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
  677. return createLibcall(MIRBuilder, Libcall,
  678. {MI.getOperand(0).getReg(), ToType, 0},
  679. {{MI.getOperand(1).getReg(), FromType, 0}});
  680. }
  681. LegalizerHelper::LegalizeResult
  682. LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
  683. LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
  684. unsigned Size = LLTy.getSizeInBits();
  685. auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
  686. switch (MI.getOpcode()) {
  687. default:
  688. return UnableToLegalize;
  689. case TargetOpcode::G_SDIV:
  690. case TargetOpcode::G_UDIV:
  691. case TargetOpcode::G_SREM:
  692. case TargetOpcode::G_UREM:
  693. case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
  694. Type *HLTy = IntegerType::get(Ctx, Size);
  695. auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
  696. if (Status != Legalized)
  697. return Status;
  698. break;
  699. }
  700. case TargetOpcode::G_FADD:
  701. case TargetOpcode::G_FSUB:
  702. case TargetOpcode::G_FMUL:
  703. case TargetOpcode::G_FDIV:
  704. case TargetOpcode::G_FMA:
  705. case TargetOpcode::G_FPOW:
  706. case TargetOpcode::G_FREM:
  707. case TargetOpcode::G_FCOS:
  708. case TargetOpcode::G_FSIN:
  709. case TargetOpcode::G_FLOG10:
  710. case TargetOpcode::G_FLOG:
  711. case TargetOpcode::G_FLOG2:
  712. case TargetOpcode::G_FEXP:
  713. case TargetOpcode::G_FEXP2:
  714. case TargetOpcode::G_FCEIL:
  715. case TargetOpcode::G_FFLOOR:
  716. case TargetOpcode::G_FMINNUM:
  717. case TargetOpcode::G_FMAXNUM:
  718. case TargetOpcode::G_FSQRT:
  719. case TargetOpcode::G_FRINT:
  720. case TargetOpcode::G_FNEARBYINT:
  721. case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
  722. Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
  723. if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
  724. LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
  725. return UnableToLegalize;
  726. }
  727. auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
  728. if (Status != Legalized)
  729. return Status;
  730. break;
  731. }
  732. case TargetOpcode::G_FPEXT:
  733. case TargetOpcode::G_FPTRUNC: {
  734. Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
  735. Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
  736. if (!FromTy || !ToTy)
  737. return UnableToLegalize;
  738. LegalizeResult Status = conversionLibcall(MI, MIRBuilder, ToTy, FromTy );
  739. if (Status != Legalized)
  740. return Status;
  741. break;
  742. }
  743. case TargetOpcode::G_FPTOSI:
  744. case TargetOpcode::G_FPTOUI: {
  745. // FIXME: Support other types
  746. unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
  747. unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
  748. if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
  749. return UnableToLegalize;
  750. LegalizeResult Status = conversionLibcall(
  751. MI, MIRBuilder,
  752. ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
  753. FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx));
  754. if (Status != Legalized)
  755. return Status;
  756. break;
  757. }
  758. case TargetOpcode::G_SITOFP:
  759. case TargetOpcode::G_UITOFP: {
  760. // FIXME: Support other types
  761. unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
  762. unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
  763. if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
  764. return UnableToLegalize;
  765. LegalizeResult Status = conversionLibcall(
  766. MI, MIRBuilder,
  767. ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
  768. FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx));
  769. if (Status != Legalized)
  770. return Status;
  771. break;
  772. }
  773. case TargetOpcode::G_BZERO:
  774. case TargetOpcode::G_MEMCPY:
  775. case TargetOpcode::G_MEMMOVE:
  776. case TargetOpcode::G_MEMSET: {
  777. LegalizeResult Result =
  778. createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI, LocObserver);
  779. if (Result != Legalized)
  780. return Result;
  781. MI.eraseFromParent();
  782. return Result;
  783. }
  784. }
  785. MI.eraseFromParent();
  786. return Legalized;
  787. }
  788. LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
  789. unsigned TypeIdx,
  790. LLT NarrowTy) {
  791. uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
  792. uint64_t NarrowSize = NarrowTy.getSizeInBits();
  793. switch (MI.getOpcode()) {
  794. default:
  795. return UnableToLegalize;
  796. case TargetOpcode::G_IMPLICIT_DEF: {
  797. Register DstReg = MI.getOperand(0).getReg();
  798. LLT DstTy = MRI.getType(DstReg);
  799. // If SizeOp0 is not an exact multiple of NarrowSize, emit
  800. // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
  801. // FIXME: Although this would also be legal for the general case, it causes
  802. // a lot of regressions in the emitted code (superfluous COPYs, artifact
  803. // combines not being hit). This seems to be a problem related to the
  804. // artifact combiner.
  805. if (SizeOp0 % NarrowSize != 0) {
  806. LLT ImplicitTy = NarrowTy;
  807. if (DstTy.isVector())
  808. ImplicitTy = LLT::vector(DstTy.getElementCount(), ImplicitTy);
  809. Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
  810. MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
  811. MI.eraseFromParent();
  812. return Legalized;
  813. }
  814. int NumParts = SizeOp0 / NarrowSize;
  815. SmallVector<Register, 2> DstRegs;
  816. for (int i = 0; i < NumParts; ++i)
  817. DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
  818. if (DstTy.isVector())
  819. MIRBuilder.buildBuildVector(DstReg, DstRegs);
  820. else
  821. MIRBuilder.buildMerge(DstReg, DstRegs);
  822. MI.eraseFromParent();
  823. return Legalized;
  824. }
  825. case TargetOpcode::G_CONSTANT: {
  826. LLT Ty = MRI.getType(MI.getOperand(0).getReg());
  827. const APInt &Val = MI.getOperand(1).getCImm()->getValue();
  828. unsigned TotalSize = Ty.getSizeInBits();
  829. unsigned NarrowSize = NarrowTy.getSizeInBits();
  830. int NumParts = TotalSize / NarrowSize;
  831. SmallVector<Register, 4> PartRegs;
  832. for (int I = 0; I != NumParts; ++I) {
  833. unsigned Offset = I * NarrowSize;
  834. auto K = MIRBuilder.buildConstant(NarrowTy,
  835. Val.lshr(Offset).trunc(NarrowSize));
  836. PartRegs.push_back(K.getReg(0));
  837. }
  838. LLT LeftoverTy;
  839. unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
  840. SmallVector<Register, 1> LeftoverRegs;
  841. if (LeftoverBits != 0) {
  842. LeftoverTy = LLT::scalar(LeftoverBits);
  843. auto K = MIRBuilder.buildConstant(
  844. LeftoverTy,
  845. Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
  846. LeftoverRegs.push_back(K.getReg(0));
  847. }
  848. insertParts(MI.getOperand(0).getReg(),
  849. Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
  850. MI.eraseFromParent();
  851. return Legalized;
  852. }
  853. case TargetOpcode::G_SEXT:
  854. case TargetOpcode::G_ZEXT:
  855. case TargetOpcode::G_ANYEXT:
  856. return narrowScalarExt(MI, TypeIdx, NarrowTy);
  857. case TargetOpcode::G_TRUNC: {
  858. if (TypeIdx != 1)
  859. return UnableToLegalize;
  860. uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
  861. if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
  862. LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
  863. return UnableToLegalize;
  864. }
  865. auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
  866. MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
  867. MI.eraseFromParent();
  868. return Legalized;
  869. }
  870. case TargetOpcode::G_FREEZE: {
  871. if (TypeIdx != 0)
  872. return UnableToLegalize;
  873. LLT Ty = MRI.getType(MI.getOperand(0).getReg());
  874. // Should widen scalar first
  875. if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0)
  876. return UnableToLegalize;
  877. auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
  878. SmallVector<Register, 8> Parts;
  879. for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
  880. Parts.push_back(
  881. MIRBuilder.buildFreeze(NarrowTy, Unmerge.getReg(i)).getReg(0));
  882. }
  883. MIRBuilder.buildMerge(MI.getOperand(0).getReg(), Parts);
  884. MI.eraseFromParent();
  885. return Legalized;
  886. }
  887. case TargetOpcode::G_ADD:
  888. case TargetOpcode::G_SUB:
  889. case TargetOpcode::G_SADDO:
  890. case TargetOpcode::G_SSUBO:
  891. case TargetOpcode::G_SADDE:
  892. case TargetOpcode::G_SSUBE:
  893. case TargetOpcode::G_UADDO:
  894. case TargetOpcode::G_USUBO:
  895. case TargetOpcode::G_UADDE:
  896. case TargetOpcode::G_USUBE:
  897. return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
  898. case TargetOpcode::G_MUL:
  899. case TargetOpcode::G_UMULH:
  900. return narrowScalarMul(MI, NarrowTy);
  901. case TargetOpcode::G_EXTRACT:
  902. return narrowScalarExtract(MI, TypeIdx, NarrowTy);
  903. case TargetOpcode::G_INSERT:
  904. return narrowScalarInsert(MI, TypeIdx, NarrowTy);
  905. case TargetOpcode::G_LOAD: {
  906. auto &LoadMI = cast<GLoad>(MI);
  907. Register DstReg = LoadMI.getDstReg();
  908. LLT DstTy = MRI.getType(DstReg);
  909. if (DstTy.isVector())
  910. return UnableToLegalize;
  911. if (8 * LoadMI.getMemSize() != DstTy.getSizeInBits()) {
  912. Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
  913. MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
  914. MIRBuilder.buildAnyExt(DstReg, TmpReg);
  915. LoadMI.eraseFromParent();
  916. return Legalized;
  917. }
  918. return reduceLoadStoreWidth(LoadMI, TypeIdx, NarrowTy);
  919. }
  920. case TargetOpcode::G_ZEXTLOAD:
  921. case TargetOpcode::G_SEXTLOAD: {
  922. auto &LoadMI = cast<GExtLoad>(MI);
  923. Register DstReg = LoadMI.getDstReg();
  924. Register PtrReg = LoadMI.getPointerReg();
  925. Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
  926. auto &MMO = LoadMI.getMMO();
  927. unsigned MemSize = MMO.getSizeInBits();
  928. if (MemSize == NarrowSize) {
  929. MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
  930. } else if (MemSize < NarrowSize) {
  931. MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
  932. } else if (MemSize > NarrowSize) {
  933. // FIXME: Need to split the load.
  934. return UnableToLegalize;
  935. }
  936. if (isa<GZExtLoad>(LoadMI))
  937. MIRBuilder.buildZExt(DstReg, TmpReg);
  938. else
  939. MIRBuilder.buildSExt(DstReg, TmpReg);
  940. LoadMI.eraseFromParent();
  941. return Legalized;
  942. }
  943. case TargetOpcode::G_STORE: {
  944. auto &StoreMI = cast<GStore>(MI);
  945. Register SrcReg = StoreMI.getValueReg();
  946. LLT SrcTy = MRI.getType(SrcReg);
  947. if (SrcTy.isVector())
  948. return UnableToLegalize;
  949. int NumParts = SizeOp0 / NarrowSize;
  950. unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
  951. unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
  952. if (SrcTy.isVector() && LeftoverBits != 0)
  953. return UnableToLegalize;
  954. if (8 * StoreMI.getMemSize() != SrcTy.getSizeInBits()) {
  955. Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
  956. MIRBuilder.buildTrunc(TmpReg, SrcReg);
  957. MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
  958. StoreMI.eraseFromParent();
  959. return Legalized;
  960. }
  961. return reduceLoadStoreWidth(StoreMI, 0, NarrowTy);
  962. }
  963. case TargetOpcode::G_SELECT:
  964. return narrowScalarSelect(MI, TypeIdx, NarrowTy);
  965. case TargetOpcode::G_AND:
  966. case TargetOpcode::G_OR:
  967. case TargetOpcode::G_XOR: {
  968. // Legalize bitwise operation:
  969. // A = BinOp<Ty> B, C
  970. // into:
  971. // B1, ..., BN = G_UNMERGE_VALUES B
  972. // C1, ..., CN = G_UNMERGE_VALUES C
  973. // A1 = BinOp<Ty/N> B1, C2
  974. // ...
  975. // AN = BinOp<Ty/N> BN, CN
  976. // A = G_MERGE_VALUES A1, ..., AN
  977. return narrowScalarBasic(MI, TypeIdx, NarrowTy);
  978. }
  979. case TargetOpcode::G_SHL:
  980. case TargetOpcode::G_LSHR:
  981. case TargetOpcode::G_ASHR:
  982. return narrowScalarShift(MI, TypeIdx, NarrowTy);
  983. case TargetOpcode::G_CTLZ:
  984. case TargetOpcode::G_CTLZ_ZERO_UNDEF:
  985. case TargetOpcode::G_CTTZ:
  986. case TargetOpcode::G_CTTZ_ZERO_UNDEF:
  987. case TargetOpcode::G_CTPOP:
  988. if (TypeIdx == 1)
  989. switch (MI.getOpcode()) {
  990. case TargetOpcode::G_CTLZ:
  991. case TargetOpcode::G_CTLZ_ZERO_UNDEF:
  992. return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
  993. case TargetOpcode::G_CTTZ:
  994. case TargetOpcode::G_CTTZ_ZERO_UNDEF:
  995. return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
  996. case TargetOpcode::G_CTPOP:
  997. return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
  998. default:
  999. return UnableToLegalize;
  1000. }
  1001. Observer.changingInstr(MI);
  1002. narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
  1003. Observer.changedInstr(MI);
  1004. return Legalized;
  1005. case TargetOpcode::G_INTTOPTR:
  1006. if (TypeIdx != 1)
  1007. return UnableToLegalize;
  1008. Observer.changingInstr(MI);
  1009. narrowScalarSrc(MI, NarrowTy, 1);
  1010. Observer.changedInstr(MI);
  1011. return Legalized;
  1012. case TargetOpcode::G_PTRTOINT:
  1013. if (TypeIdx != 0)
  1014. return UnableToLegalize;
  1015. Observer.changingInstr(MI);
  1016. narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
  1017. Observer.changedInstr(MI);
  1018. return Legalized;
  1019. case TargetOpcode::G_PHI: {
  1020. // FIXME: add support for when SizeOp0 isn't an exact multiple of
  1021. // NarrowSize.
  1022. if (SizeOp0 % NarrowSize != 0)
  1023. return UnableToLegalize;
  1024. unsigned NumParts = SizeOp0 / NarrowSize;
  1025. SmallVector<Register, 2> DstRegs(NumParts);
  1026. SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
  1027. Observer.changingInstr(MI);
  1028. for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
  1029. MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
  1030. MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
  1031. extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
  1032. SrcRegs[i / 2]);
  1033. }
  1034. MachineBasicBlock &MBB = *MI.getParent();
  1035. MIRBuilder.setInsertPt(MBB, MI);
  1036. for (unsigned i = 0; i < NumParts; ++i) {
  1037. DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
  1038. MachineInstrBuilder MIB =
  1039. MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
  1040. for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
  1041. MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
  1042. }
  1043. MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
  1044. MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
  1045. Observer.changedInstr(MI);
  1046. MI.eraseFromParent();
  1047. return Legalized;
  1048. }
  1049. case TargetOpcode::G_EXTRACT_VECTOR_ELT:
  1050. case TargetOpcode::G_INSERT_VECTOR_ELT: {
  1051. if (TypeIdx != 2)
  1052. return UnableToLegalize;
  1053. int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
  1054. Observer.changingInstr(MI);
  1055. narrowScalarSrc(MI, NarrowTy, OpIdx);
  1056. Observer.changedInstr(MI);
  1057. return Legalized;
  1058. }
  1059. case TargetOpcode::G_ICMP: {
  1060. Register LHS = MI.getOperand(2).getReg();
  1061. LLT SrcTy = MRI.getType(LHS);
  1062. uint64_t SrcSize = SrcTy.getSizeInBits();
  1063. CmpInst::Predicate Pred =
  1064. static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
  1065. // TODO: Handle the non-equality case for weird sizes.
  1066. if (NarrowSize * 2 != SrcSize && !ICmpInst::isEquality(Pred))
  1067. return UnableToLegalize;
  1068. LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
  1069. SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
  1070. if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
  1071. LHSLeftoverRegs))
  1072. return UnableToLegalize;
  1073. LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
  1074. SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
  1075. if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
  1076. RHSPartRegs, RHSLeftoverRegs))
  1077. return UnableToLegalize;
  1078. // We now have the LHS and RHS of the compare split into narrow-type
  1079. // registers, plus potentially some leftover type.
  1080. Register Dst = MI.getOperand(0).getReg();
  1081. LLT ResTy = MRI.getType(Dst);
  1082. if (ICmpInst::isEquality(Pred)) {
  1083. // For each part on the LHS and RHS, keep track of the result of XOR-ing
  1084. // them together. For each equal part, the result should be all 0s. For
  1085. // each non-equal part, we'll get at least one 1.
  1086. auto Zero = MIRBuilder.buildConstant(NarrowTy, 0);
  1087. SmallVector<Register, 4> Xors;
  1088. for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) {
  1089. auto LHS = std::get<0>(LHSAndRHS);
  1090. auto RHS = std::get<1>(LHSAndRHS);
  1091. auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
  1092. Xors.push_back(Xor);
  1093. }
  1094. // Build a G_XOR for each leftover register. Each G_XOR must be widened
  1095. // to the desired narrow type so that we can OR them together later.
  1096. SmallVector<Register, 4> WidenedXors;
  1097. for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
  1098. auto LHS = std::get<0>(LHSAndRHS);
  1099. auto RHS = std::get<1>(LHSAndRHS);
  1100. auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
  1101. LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor);
  1102. buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
  1103. /* PadStrategy = */ TargetOpcode::G_ZEXT);
  1104. Xors.insert(Xors.end(), WidenedXors.begin(), WidenedXors.end());
  1105. }
  1106. // Now, for each part we broke up, we know if they are equal/not equal
  1107. // based off the G_XOR. We can OR these all together and compare against
  1108. // 0 to get the result.
  1109. assert(Xors.size() >= 2 && "Should have gotten at least two Xors?");
  1110. auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
  1111. for (unsigned I = 2, E = Xors.size(); I < E; ++I)
  1112. Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]);
  1113. MIRBuilder.buildICmp(Pred, Dst, Or, Zero);
  1114. } else {
  1115. // TODO: Handle non-power-of-two types.
  1116. assert(LHSPartRegs.size() == 2 && "Expected exactly 2 LHS part regs?");
  1117. assert(RHSPartRegs.size() == 2 && "Expected exactly 2 RHS part regs?");
  1118. Register LHSL = LHSPartRegs[0];
  1119. Register LHSH = LHSPartRegs[1];
  1120. Register RHSL = RHSPartRegs[0];
  1121. Register RHSH = RHSPartRegs[1];
  1122. MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH);
  1123. MachineInstrBuilder CmpHEQ =
  1124. MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH);
  1125. MachineInstrBuilder CmpLU = MIRBuilder.buildICmp(
  1126. ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL);
  1127. MIRBuilder.buildSelect(Dst, CmpHEQ, CmpLU, CmpH);
  1128. }
  1129. MI.eraseFromParent();
  1130. return Legalized;
  1131. }
  1132. case TargetOpcode::G_SEXT_INREG: {
  1133. if (TypeIdx != 0)
  1134. return UnableToLegalize;
  1135. int64_t SizeInBits = MI.getOperand(2).getImm();
  1136. // So long as the new type has more bits than the bits we're extending we
  1137. // don't need to break it apart.
  1138. if (NarrowTy.getScalarSizeInBits() >= SizeInBits) {
  1139. Observer.changingInstr(MI);
  1140. // We don't lose any non-extension bits by truncating the src and
  1141. // sign-extending the dst.
  1142. MachineOperand &MO1 = MI.getOperand(1);
  1143. auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
  1144. MO1.setReg(TruncMIB.getReg(0));
  1145. MachineOperand &MO2 = MI.getOperand(0);
  1146. Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
  1147. MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
  1148. MIRBuilder.buildSExt(MO2, DstExt);
  1149. MO2.setReg(DstExt);
  1150. Observer.changedInstr(MI);
  1151. return Legalized;
  1152. }
  1153. // Break it apart. Components below the extension point are unmodified. The
  1154. // component containing the extension point becomes a narrower SEXT_INREG.
  1155. // Components above it are ashr'd from the component containing the
  1156. // extension point.
  1157. if (SizeOp0 % NarrowSize != 0)
  1158. return UnableToLegalize;
  1159. int NumParts = SizeOp0 / NarrowSize;
  1160. // List the registers where the destination will be scattered.
  1161. SmallVector<Register, 2> DstRegs;
  1162. // List the registers where the source will be split.
  1163. SmallVector<Register, 2> SrcRegs;
  1164. // Create all the temporary registers.
  1165. for (int i = 0; i < NumParts; ++i) {
  1166. Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
  1167. SrcRegs.push_back(SrcReg);
  1168. }
  1169. // Explode the big arguments into smaller chunks.
  1170. MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
  1171. Register AshrCstReg =
  1172. MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
  1173. .getReg(0);
  1174. Register FullExtensionReg = 0;
  1175. Register PartialExtensionReg = 0;
  1176. // Do the operation on each small part.
  1177. for (int i = 0; i < NumParts; ++i) {
  1178. if ((i + 1) * NarrowTy.getScalarSizeInBits() < SizeInBits)
  1179. DstRegs.push_back(SrcRegs[i]);
  1180. else if (i * NarrowTy.getScalarSizeInBits() > SizeInBits) {
  1181. assert(PartialExtensionReg &&
  1182. "Expected to visit partial extension before full");
  1183. if (FullExtensionReg) {
  1184. DstRegs.push_back(FullExtensionReg);
  1185. continue;
  1186. }
  1187. DstRegs.push_back(
  1188. MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
  1189. .getReg(0));
  1190. FullExtensionReg = DstRegs.back();
  1191. } else {
  1192. DstRegs.push_back(
  1193. MIRBuilder
  1194. .buildInstr(
  1195. TargetOpcode::G_SEXT_INREG, {NarrowTy},
  1196. {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
  1197. .getReg(0));
  1198. PartialExtensionReg = DstRegs.back();
  1199. }
  1200. }
  1201. // Gather the destination registers into the final destination.
  1202. Register DstReg = MI.getOperand(0).getReg();
  1203. MIRBuilder.buildMerge(DstReg, DstRegs);
  1204. MI.eraseFromParent();
  1205. return Legalized;
  1206. }
  1207. case TargetOpcode::G_BSWAP:
  1208. case TargetOpcode::G_BITREVERSE: {
  1209. if (SizeOp0 % NarrowSize != 0)
  1210. return UnableToLegalize;
  1211. Observer.changingInstr(MI);
  1212. SmallVector<Register, 2> SrcRegs, DstRegs;
  1213. unsigned NumParts = SizeOp0 / NarrowSize;
  1214. extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
  1215. for (unsigned i = 0; i < NumParts; ++i) {
  1216. auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
  1217. {SrcRegs[NumParts - 1 - i]});
  1218. DstRegs.push_back(DstPart.getReg(0));
  1219. }
  1220. MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
  1221. Observer.changedInstr(MI);
  1222. MI.eraseFromParent();
  1223. return Legalized;
  1224. }
  1225. case TargetOpcode::G_PTR_ADD:
  1226. case TargetOpcode::G_PTRMASK: {
  1227. if (TypeIdx != 1)
  1228. return UnableToLegalize;
  1229. Observer.changingInstr(MI);
  1230. narrowScalarSrc(MI, NarrowTy, 2);
  1231. Observer.changedInstr(MI);
  1232. return Legalized;
  1233. }
  1234. case TargetOpcode::G_FPTOUI:
  1235. case TargetOpcode::G_FPTOSI:
  1236. return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
  1237. case TargetOpcode::G_FPEXT:
  1238. if (TypeIdx != 0)
  1239. return UnableToLegalize;
  1240. Observer.changingInstr(MI);
  1241. narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
  1242. Observer.changedInstr(MI);
  1243. return Legalized;
  1244. }
  1245. }
  1246. Register LegalizerHelper::coerceToScalar(Register Val) {
  1247. LLT Ty = MRI.getType(Val);
  1248. if (Ty.isScalar())
  1249. return Val;
  1250. const DataLayout &DL = MIRBuilder.getDataLayout();
  1251. LLT NewTy = LLT::scalar(Ty.getSizeInBits());
  1252. if (Ty.isPointer()) {
  1253. if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
  1254. return Register();
  1255. return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
  1256. }
  1257. Register NewVal = Val;
  1258. assert(Ty.isVector());
  1259. LLT EltTy = Ty.getElementType();
  1260. if (EltTy.isPointer())
  1261. NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
  1262. return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
  1263. }
  1264. void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy,
  1265. unsigned OpIdx, unsigned ExtOpcode) {
  1266. MachineOperand &MO = MI.getOperand(OpIdx);
  1267. auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
  1268. MO.setReg(ExtB.getReg(0));
  1269. }
  1270. void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy,
  1271. unsigned OpIdx) {
  1272. MachineOperand &MO = MI.getOperand(OpIdx);
  1273. auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
  1274. MO.setReg(ExtB.getReg(0));
  1275. }
  1276. void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
  1277. unsigned OpIdx, unsigned TruncOpcode) {
  1278. MachineOperand &MO = MI.getOperand(OpIdx);
  1279. Register DstExt = MRI.createGenericVirtualRegister(WideTy);
  1280. MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
  1281. MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
  1282. MO.setReg(DstExt);
  1283. }
  1284. void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy,
  1285. unsigned OpIdx, unsigned ExtOpcode) {
  1286. MachineOperand &MO = MI.getOperand(OpIdx);
  1287. Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
  1288. MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
  1289. MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
  1290. MO.setReg(DstTrunc);
  1291. }
  1292. void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy,
  1293. unsigned OpIdx) {
  1294. MachineOperand &MO = MI.getOperand(OpIdx);
  1295. MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
  1296. Register Dst = MO.getReg();
  1297. Register DstExt = MRI.createGenericVirtualRegister(WideTy);
  1298. MO.setReg(DstExt);
  1299. MIRBuilder.buildDeleteTrailingVectorElements(Dst, DstExt);
  1300. }
  1301. void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy,
  1302. unsigned OpIdx) {
  1303. MachineOperand &MO = MI.getOperand(OpIdx);
  1304. SmallVector<Register, 8> Regs;
  1305. MO.setReg(MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO).getReg(0));
  1306. }
  1307. void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
  1308. MachineOperand &Op = MI.getOperand(OpIdx);
  1309. Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
  1310. }
  1311. void LegalizerHelper::bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
  1312. MachineOperand &MO = MI.getOperand(OpIdx);
  1313. Register CastDst = MRI.createGenericVirtualRegister(CastTy);
  1314. MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
  1315. MIRBuilder.buildBitcast(MO, CastDst);
  1316. MO.setReg(CastDst);
  1317. }
  1318. LegalizerHelper::LegalizeResult
  1319. LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
  1320. LLT WideTy) {
  1321. if (TypeIdx != 1)
  1322. return UnableToLegalize;
  1323. Register DstReg = MI.getOperand(0).getReg();
  1324. LLT DstTy = MRI.getType(DstReg);
  1325. if (DstTy.isVector())
  1326. return UnableToLegalize;
  1327. Register Src1 = MI.getOperand(1).getReg();
  1328. LLT SrcTy = MRI.getType(Src1);
  1329. const int DstSize = DstTy.getSizeInBits();
  1330. const int SrcSize = SrcTy.getSizeInBits();
  1331. const int WideSize = WideTy.getSizeInBits();
  1332. const int NumMerge = (DstSize + WideSize - 1) / WideSize;
  1333. unsigned NumOps = MI.getNumOperands();
  1334. unsigned NumSrc = MI.getNumOperands() - 1;
  1335. unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
  1336. if (WideSize >= DstSize) {
  1337. // Directly pack the bits in the target type.
  1338. Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1).getReg(0);
  1339. for (unsigned I = 2; I != NumOps; ++I) {
  1340. const unsigned Offset = (I - 1) * PartSize;
  1341. Register SrcReg = MI.getOperand(I).getReg();
  1342. assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
  1343. auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
  1344. Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
  1345. MRI.createGenericVirtualRegister(WideTy);
  1346. auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
  1347. auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
  1348. MIRBuilder.buildOr(NextResult, ResultReg, Shl);
  1349. ResultReg = NextResult;
  1350. }
  1351. if (WideSize > DstSize)
  1352. MIRBuilder.buildTrunc(DstReg, ResultReg);
  1353. else if (DstTy.isPointer())
  1354. MIRBuilder.buildIntToPtr(DstReg, ResultReg);
  1355. MI.eraseFromParent();
  1356. return Legalized;
  1357. }
  1358. // Unmerge the original values to the GCD type, and recombine to the next
  1359. // multiple greater than the original type.
  1360. //
  1361. // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
  1362. // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
  1363. // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
  1364. // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
  1365. // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
  1366. // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
  1367. // %12:_(s12) = G_MERGE_VALUES %10, %11
  1368. //
  1369. // Padding with undef if necessary:
  1370. //
  1371. // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
  1372. // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
  1373. // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
  1374. // %7:_(s2) = G_IMPLICIT_DEF
  1375. // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
  1376. // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
  1377. // %10:_(s12) = G_MERGE_VALUES %8, %9
  1378. const int GCD = greatestCommonDivisor(SrcSize, WideSize);
  1379. LLT GCDTy = LLT::scalar(GCD);
  1380. SmallVector<Register, 8> Parts;
  1381. SmallVector<Register, 8> NewMergeRegs;
  1382. SmallVector<Register, 8> Unmerges;
  1383. LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
  1384. // Decompose the original operands if they don't evenly divide.
  1385. for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
  1386. Register SrcReg = MO.getReg();
  1387. if (GCD == SrcSize) {
  1388. Unmerges.push_back(SrcReg);
  1389. } else {
  1390. auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
  1391. for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
  1392. Unmerges.push_back(Unmerge.getReg(J));
  1393. }
  1394. }
  1395. // Pad with undef to the next size that is a multiple of the requested size.
  1396. if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
  1397. Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
  1398. for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
  1399. Unmerges.push_back(UndefReg);
  1400. }
  1401. const int PartsPerGCD = WideSize / GCD;
  1402. // Build merges of each piece.
  1403. ArrayRef<Register> Slicer(Unmerges);
  1404. for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
  1405. auto Merge = MIRBuilder.buildMerge(WideTy, Slicer.take_front(PartsPerGCD));
  1406. NewMergeRegs.push_back(Merge.getReg(0));
  1407. }
  1408. // A truncate may be necessary if the requested type doesn't evenly divide the
  1409. // original result type.
  1410. if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
  1411. MIRBuilder.buildMerge(DstReg, NewMergeRegs);
  1412. } else {
  1413. auto FinalMerge = MIRBuilder.buildMerge(WideDstTy, NewMergeRegs);
  1414. MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
  1415. }
  1416. MI.eraseFromParent();
  1417. return Legalized;
  1418. }
  1419. Register LegalizerHelper::widenWithUnmerge(LLT WideTy, Register OrigReg) {
  1420. Register WideReg = MRI.createGenericVirtualRegister(WideTy);
  1421. LLT OrigTy = MRI.getType(OrigReg);
  1422. LLT LCMTy = getLCMType(WideTy, OrigTy);
  1423. const int NumMergeParts = LCMTy.getSizeInBits() / WideTy.getSizeInBits();
  1424. const int NumUnmergeParts = LCMTy.getSizeInBits() / OrigTy.getSizeInBits();
  1425. Register UnmergeSrc = WideReg;
  1426. // Create a merge to the LCM type, padding with undef
  1427. // %0:_(<3 x s32>) = G_FOO => <4 x s32>
  1428. // =>
  1429. // %1:_(<4 x s32>) = G_FOO
  1430. // %2:_(<4 x s32>) = G_IMPLICIT_DEF
  1431. // %3:_(<12 x s32>) = G_CONCAT_VECTORS %1, %2, %2
  1432. // %0:_(<3 x s32>), %4:_, %5:_, %6:_ = G_UNMERGE_VALUES %3
  1433. if (NumMergeParts > 1) {
  1434. Register Undef = MIRBuilder.buildUndef(WideTy).getReg(0);
  1435. SmallVector<Register, 8> MergeParts(NumMergeParts, Undef);
  1436. MergeParts[0] = WideReg;
  1437. UnmergeSrc = MIRBuilder.buildMerge(LCMTy, MergeParts).getReg(0);
  1438. }
  1439. // Unmerge to the original register and pad with dead defs.
  1440. SmallVector<Register, 8> UnmergeResults(NumUnmergeParts);
  1441. UnmergeResults[0] = OrigReg;
  1442. for (int I = 1; I != NumUnmergeParts; ++I)
  1443. UnmergeResults[I] = MRI.createGenericVirtualRegister(OrigTy);
  1444. MIRBuilder.buildUnmerge(UnmergeResults, UnmergeSrc);
  1445. return WideReg;
  1446. }
  1447. LegalizerHelper::LegalizeResult
  1448. LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
  1449. LLT WideTy) {
  1450. if (TypeIdx != 0)
  1451. return UnableToLegalize;
  1452. int NumDst = MI.getNumOperands() - 1;
  1453. Register SrcReg = MI.getOperand(NumDst).getReg();
  1454. LLT SrcTy = MRI.getType(SrcReg);
  1455. if (SrcTy.isVector())
  1456. return UnableToLegalize;
  1457. Register Dst0Reg = MI.getOperand(0).getReg();
  1458. LLT DstTy = MRI.getType(Dst0Reg);
  1459. if (!DstTy.isScalar())
  1460. return UnableToLegalize;
  1461. if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
  1462. if (SrcTy.isPointer()) {
  1463. const DataLayout &DL = MIRBuilder.getDataLayout();
  1464. if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
  1465. LLVM_DEBUG(
  1466. dbgs() << "Not casting non-integral address space integer\n");
  1467. return UnableToLegalize;
  1468. }
  1469. SrcTy = LLT::scalar(SrcTy.getSizeInBits());
  1470. SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
  1471. }
  1472. // Widen SrcTy to WideTy. This does not affect the result, but since the
  1473. // user requested this size, it is probably better handled than SrcTy and
  1474. // should reduce the total number of legalization artifacts.
  1475. if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
  1476. SrcTy = WideTy;
  1477. SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
  1478. }
  1479. // Theres no unmerge type to target. Directly extract the bits from the
  1480. // source type
  1481. unsigned DstSize = DstTy.getSizeInBits();
  1482. MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
  1483. for (int I = 1; I != NumDst; ++I) {
  1484. auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
  1485. auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
  1486. MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
  1487. }
  1488. MI.eraseFromParent();
  1489. return Legalized;
  1490. }
  1491. // Extend the source to a wider type.
  1492. LLT LCMTy = getLCMType(SrcTy, WideTy);
  1493. Register WideSrc = SrcReg;
  1494. if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
  1495. // TODO: If this is an integral address space, cast to integer and anyext.
  1496. if (SrcTy.isPointer()) {
  1497. LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
  1498. return UnableToLegalize;
  1499. }
  1500. WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
  1501. }
  1502. auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
  1503. // Create a sequence of unmerges and merges to the original results. Since we
  1504. // may have widened the source, we will need to pad the results with dead defs
  1505. // to cover the source register.
  1506. // e.g. widen s48 to s64:
  1507. // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
  1508. //
  1509. // =>
  1510. // %4:_(s192) = G_ANYEXT %0:_(s96)
  1511. // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
  1512. // ; unpack to GCD type, with extra dead defs
  1513. // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
  1514. // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
  1515. // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
  1516. // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
  1517. // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
  1518. const LLT GCDTy = getGCDType(WideTy, DstTy);
  1519. const int NumUnmerge = Unmerge->getNumOperands() - 1;
  1520. const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
  1521. // Directly unmerge to the destination without going through a GCD type
  1522. // if possible
  1523. if (PartsPerRemerge == 1) {
  1524. const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
  1525. for (int I = 0; I != NumUnmerge; ++I) {
  1526. auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
  1527. for (int J = 0; J != PartsPerUnmerge; ++J) {
  1528. int Idx = I * PartsPerUnmerge + J;
  1529. if (Idx < NumDst)
  1530. MIB.addDef(MI.getOperand(Idx).getReg());
  1531. else {
  1532. // Create dead def for excess components.
  1533. MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
  1534. }
  1535. }
  1536. MIB.addUse(Unmerge.getReg(I));
  1537. }
  1538. } else {
  1539. SmallVector<Register, 16> Parts;
  1540. for (int J = 0; J != NumUnmerge; ++J)
  1541. extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
  1542. SmallVector<Register, 8> RemergeParts;
  1543. for (int I = 0; I != NumDst; ++I) {
  1544. for (int J = 0; J < PartsPerRemerge; ++J) {
  1545. const int Idx = I * PartsPerRemerge + J;
  1546. RemergeParts.emplace_back(Parts[Idx]);
  1547. }
  1548. MIRBuilder.buildMerge(MI.getOperand(I).getReg(), RemergeParts);
  1549. RemergeParts.clear();
  1550. }
  1551. }
  1552. MI.eraseFromParent();
  1553. return Legalized;
  1554. }
  1555. LegalizerHelper::LegalizeResult
  1556. LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
  1557. LLT WideTy) {
  1558. Register DstReg = MI.getOperand(0).getReg();
  1559. Register SrcReg = MI.getOperand(1).getReg();
  1560. LLT SrcTy = MRI.getType(SrcReg);
  1561. LLT DstTy = MRI.getType(DstReg);
  1562. unsigned Offset = MI.getOperand(2).getImm();
  1563. if (TypeIdx == 0) {
  1564. if (SrcTy.isVector() || DstTy.isVector())
  1565. return UnableToLegalize;
  1566. SrcOp Src(SrcReg);
  1567. if (SrcTy.isPointer()) {
  1568. // Extracts from pointers can be handled only if they are really just
  1569. // simple integers.
  1570. const DataLayout &DL = MIRBuilder.getDataLayout();
  1571. if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
  1572. return UnableToLegalize;
  1573. LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
  1574. Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
  1575. SrcTy = SrcAsIntTy;
  1576. }
  1577. if (DstTy.isPointer())
  1578. return UnableToLegalize;
  1579. if (Offset == 0) {
  1580. // Avoid a shift in the degenerate case.
  1581. MIRBuilder.buildTrunc(DstReg,
  1582. MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
  1583. MI.eraseFromParent();
  1584. return Legalized;
  1585. }
  1586. // Do a shift in the source type.
  1587. LLT ShiftTy = SrcTy;
  1588. if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
  1589. Src = MIRBuilder.buildAnyExt(WideTy, Src);
  1590. ShiftTy = WideTy;
  1591. }
  1592. auto LShr = MIRBuilder.buildLShr(
  1593. ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
  1594. MIRBuilder.buildTrunc(DstReg, LShr);
  1595. MI.eraseFromParent();
  1596. return Legalized;
  1597. }
  1598. if (SrcTy.isScalar()) {
  1599. Observer.changingInstr(MI);
  1600. widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
  1601. Observer.changedInstr(MI);
  1602. return Legalized;
  1603. }
  1604. if (!SrcTy.isVector())
  1605. return UnableToLegalize;
  1606. if (DstTy != SrcTy.getElementType())
  1607. return UnableToLegalize;
  1608. if (Offset % SrcTy.getScalarSizeInBits() != 0)
  1609. return UnableToLegalize;
  1610. Observer.changingInstr(MI);
  1611. widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
  1612. MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
  1613. Offset);
  1614. widenScalarDst(MI, WideTy.getScalarType(), 0);
  1615. Observer.changedInstr(MI);
  1616. return Legalized;
  1617. }
  1618. LegalizerHelper::LegalizeResult
  1619. LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
  1620. LLT WideTy) {
  1621. if (TypeIdx != 0 || WideTy.isVector())
  1622. return UnableToLegalize;
  1623. Observer.changingInstr(MI);
  1624. widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
  1625. widenScalarDst(MI, WideTy);
  1626. Observer.changedInstr(MI);
  1627. return Legalized;
  1628. }
  1629. LegalizerHelper::LegalizeResult
  1630. LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
  1631. LLT WideTy) {
  1632. if (TypeIdx == 1)
  1633. return UnableToLegalize; // TODO
  1634. unsigned Opcode;
  1635. unsigned ExtOpcode;
  1636. Optional<Register> CarryIn = None;
  1637. switch (MI.getOpcode()) {
  1638. default:
  1639. llvm_unreachable("Unexpected opcode!");
  1640. case TargetOpcode::G_SADDO:
  1641. Opcode = TargetOpcode::G_ADD;
  1642. ExtOpcode = TargetOpcode::G_SEXT;
  1643. break;
  1644. case TargetOpcode::G_SSUBO:
  1645. Opcode = TargetOpcode::G_SUB;
  1646. ExtOpcode = TargetOpcode::G_SEXT;
  1647. break;
  1648. case TargetOpcode::G_UADDO:
  1649. Opcode = TargetOpcode::G_ADD;
  1650. ExtOpcode = TargetOpcode::G_ZEXT;
  1651. break;
  1652. case TargetOpcode::G_USUBO:
  1653. Opcode = TargetOpcode::G_SUB;
  1654. ExtOpcode = TargetOpcode::G_ZEXT;
  1655. break;
  1656. case TargetOpcode::G_SADDE:
  1657. Opcode = TargetOpcode::G_UADDE;
  1658. ExtOpcode = TargetOpcode::G_SEXT;
  1659. CarryIn = MI.getOperand(4).getReg();
  1660. break;
  1661. case TargetOpcode::G_SSUBE:
  1662. Opcode = TargetOpcode::G_USUBE;
  1663. ExtOpcode = TargetOpcode::G_SEXT;
  1664. CarryIn = MI.getOperand(4).getReg();
  1665. break;
  1666. case TargetOpcode::G_UADDE:
  1667. Opcode = TargetOpcode::G_UADDE;
  1668. ExtOpcode = TargetOpcode::G_ZEXT;
  1669. CarryIn = MI.getOperand(4).getReg();
  1670. break;
  1671. case TargetOpcode::G_USUBE:
  1672. Opcode = TargetOpcode::G_USUBE;
  1673. ExtOpcode = TargetOpcode::G_ZEXT;
  1674. CarryIn = MI.getOperand(4).getReg();
  1675. break;
  1676. }
  1677. auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
  1678. auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
  1679. // Do the arithmetic in the larger type.
  1680. Register NewOp;
  1681. if (CarryIn) {
  1682. LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
  1683. NewOp = MIRBuilder
  1684. .buildInstr(Opcode, {WideTy, CarryOutTy},
  1685. {LHSExt, RHSExt, *CarryIn})
  1686. .getReg(0);
  1687. } else {
  1688. NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
  1689. }
  1690. LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
  1691. auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
  1692. auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
  1693. // There is no overflow if the ExtOp is the same as NewOp.
  1694. MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
  1695. // Now trunc the NewOp to the original result.
  1696. MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
  1697. MI.eraseFromParent();
  1698. return Legalized;
  1699. }
  1700. LegalizerHelper::LegalizeResult
  1701. LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
  1702. LLT WideTy) {
  1703. bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
  1704. MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
  1705. MI.getOpcode() == TargetOpcode::G_SSHLSAT;
  1706. bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
  1707. MI.getOpcode() == TargetOpcode::G_USHLSAT;
  1708. // We can convert this to:
  1709. // 1. Any extend iN to iM
  1710. // 2. SHL by M-N
  1711. // 3. [US][ADD|SUB|SHL]SAT
  1712. // 4. L/ASHR by M-N
  1713. //
  1714. // It may be more efficient to lower this to a min and a max operation in
  1715. // the higher precision arithmetic if the promoted operation isn't legal,
  1716. // but this decision is up to the target's lowering request.
  1717. Register DstReg = MI.getOperand(0).getReg();
  1718. unsigned NewBits = WideTy.getScalarSizeInBits();
  1719. unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
  1720. // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
  1721. // must not left shift the RHS to preserve the shift amount.
  1722. auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
  1723. auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
  1724. : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
  1725. auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
  1726. auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
  1727. auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
  1728. auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
  1729. {ShiftL, ShiftR}, MI.getFlags());
  1730. // Use a shift that will preserve the number of sign bits when the trunc is
  1731. // folded away.
  1732. auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
  1733. : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
  1734. MIRBuilder.buildTrunc(DstReg, Result);
  1735. MI.eraseFromParent();
  1736. return Legalized;
  1737. }
  1738. LegalizerHelper::LegalizeResult
  1739. LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
  1740. LLT WideTy) {
  1741. if (TypeIdx == 1)
  1742. return UnableToLegalize;
  1743. bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
  1744. Register Result = MI.getOperand(0).getReg();
  1745. Register OriginalOverflow = MI.getOperand(1).getReg();
  1746. Register LHS = MI.getOperand(2).getReg();
  1747. Register RHS = MI.getOperand(3).getReg();
  1748. LLT SrcTy = MRI.getType(LHS);
  1749. LLT OverflowTy = MRI.getType(OriginalOverflow);
  1750. unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
  1751. // To determine if the result overflowed in the larger type, we extend the
  1752. // input to the larger type, do the multiply (checking if it overflows),
  1753. // then also check the high bits of the result to see if overflow happened
  1754. // there.
  1755. unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
  1756. auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
  1757. auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
  1758. auto Mulo = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy, OverflowTy},
  1759. {LeftOperand, RightOperand});
  1760. auto Mul = Mulo->getOperand(0);
  1761. MIRBuilder.buildTrunc(Result, Mul);
  1762. MachineInstrBuilder ExtResult;
  1763. // Overflow occurred if it occurred in the larger type, or if the high part
  1764. // of the result does not zero/sign-extend the low part. Check this second
  1765. // possibility first.
  1766. if (IsSigned) {
  1767. // For signed, overflow occurred when the high part does not sign-extend
  1768. // the low part.
  1769. ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth);
  1770. } else {
  1771. // Unsigned overflow occurred when the high part does not zero-extend the
  1772. // low part.
  1773. ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
  1774. }
  1775. // Multiplication cannot overflow if the WideTy is >= 2 * original width,
  1776. // so we don't need to check the overflow result of larger type Mulo.
  1777. if (WideTy.getScalarSizeInBits() < 2 * SrcBitWidth) {
  1778. auto Overflow =
  1779. MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
  1780. // Finally check if the multiplication in the larger type itself overflowed.
  1781. MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow);
  1782. } else {
  1783. MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult);
  1784. }
  1785. MI.eraseFromParent();
  1786. return Legalized;
  1787. }
  1788. LegalizerHelper::LegalizeResult
  1789. LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
  1790. switch (MI.getOpcode()) {
  1791. default:
  1792. return UnableToLegalize;
  1793. case TargetOpcode::G_ATOMICRMW_XCHG:
  1794. case TargetOpcode::G_ATOMICRMW_ADD:
  1795. case TargetOpcode::G_ATOMICRMW_SUB:
  1796. case TargetOpcode::G_ATOMICRMW_AND:
  1797. case TargetOpcode::G_ATOMICRMW_OR:
  1798. case TargetOpcode::G_ATOMICRMW_XOR:
  1799. case TargetOpcode::G_ATOMICRMW_MIN:
  1800. case TargetOpcode::G_ATOMICRMW_MAX:
  1801. case TargetOpcode::G_ATOMICRMW_UMIN:
  1802. case TargetOpcode::G_ATOMICRMW_UMAX:
  1803. assert(TypeIdx == 0 && "atomicrmw with second scalar type");
  1804. Observer.changingInstr(MI);
  1805. widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
  1806. widenScalarDst(MI, WideTy, 0);
  1807. Observer.changedInstr(MI);
  1808. return Legalized;
  1809. case TargetOpcode::G_ATOMIC_CMPXCHG:
  1810. assert(TypeIdx == 0 && "G_ATOMIC_CMPXCHG with second scalar type");
  1811. Observer.changingInstr(MI);
  1812. widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
  1813. widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
  1814. widenScalarDst(MI, WideTy, 0);
  1815. Observer.changedInstr(MI);
  1816. return Legalized;
  1817. case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
  1818. if (TypeIdx == 0) {
  1819. Observer.changingInstr(MI);
  1820. widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
  1821. widenScalarSrc(MI, WideTy, 4, TargetOpcode::G_ANYEXT);
  1822. widenScalarDst(MI, WideTy, 0);
  1823. Observer.changedInstr(MI);
  1824. return Legalized;
  1825. }
  1826. assert(TypeIdx == 1 &&
  1827. "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
  1828. Observer.changingInstr(MI);
  1829. widenScalarDst(MI, WideTy, 1);
  1830. Observer.changedInstr(MI);
  1831. return Legalized;
  1832. case TargetOpcode::G_EXTRACT:
  1833. return widenScalarExtract(MI, TypeIdx, WideTy);
  1834. case TargetOpcode::G_INSERT:
  1835. return widenScalarInsert(MI, TypeIdx, WideTy);
  1836. case TargetOpcode::G_MERGE_VALUES:
  1837. return widenScalarMergeValues(MI, TypeIdx, WideTy);
  1838. case TargetOpcode::G_UNMERGE_VALUES:
  1839. return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
  1840. case TargetOpcode::G_SADDO:
  1841. case TargetOpcode::G_SSUBO:
  1842. case TargetOpcode::G_UADDO:
  1843. case TargetOpcode::G_USUBO:
  1844. case TargetOpcode::G_SADDE:
  1845. case TargetOpcode::G_SSUBE:
  1846. case TargetOpcode::G_UADDE:
  1847. case TargetOpcode::G_USUBE:
  1848. return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
  1849. case TargetOpcode::G_UMULO:
  1850. case TargetOpcode::G_SMULO:
  1851. return widenScalarMulo(MI, TypeIdx, WideTy);
  1852. case TargetOpcode::G_SADDSAT:
  1853. case TargetOpcode::G_SSUBSAT:
  1854. case TargetOpcode::G_SSHLSAT:
  1855. case TargetOpcode::G_UADDSAT:
  1856. case TargetOpcode::G_USUBSAT:
  1857. case TargetOpcode::G_USHLSAT:
  1858. return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
  1859. case TargetOpcode::G_CTTZ:
  1860. case TargetOpcode::G_CTTZ_ZERO_UNDEF:
  1861. case TargetOpcode::G_CTLZ:
  1862. case TargetOpcode::G_CTLZ_ZERO_UNDEF:
  1863. case TargetOpcode::G_CTPOP: {
  1864. if (TypeIdx == 0) {
  1865. Observer.changingInstr(MI);
  1866. widenScalarDst(MI, WideTy, 0);
  1867. Observer.changedInstr(MI);
  1868. return Legalized;
  1869. }
  1870. Register SrcReg = MI.getOperand(1).getReg();
  1871. // First extend the input.
  1872. unsigned ExtOpc = MI.getOpcode() == TargetOpcode::G_CTTZ ||
  1873. MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF
  1874. ? TargetOpcode::G_ANYEXT
  1875. : TargetOpcode::G_ZEXT;
  1876. auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
  1877. LLT CurTy = MRI.getType(SrcReg);
  1878. unsigned NewOpc = MI.getOpcode();
  1879. if (NewOpc == TargetOpcode::G_CTTZ) {
  1880. // The count is the same in the larger type except if the original
  1881. // value was zero. This can be handled by setting the bit just off
  1882. // the top of the original type.
  1883. auto TopBit =
  1884. APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
  1885. MIBSrc = MIRBuilder.buildOr(
  1886. WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
  1887. // Now we know the operand is non-zero, use the more relaxed opcode.
  1888. NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
  1889. }
  1890. // Perform the operation at the larger size.
  1891. auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
  1892. // This is already the correct result for CTPOP and CTTZs
  1893. if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
  1894. MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
  1895. // The correct result is NewOp - (Difference in widety and current ty).
  1896. unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
  1897. MIBNewOp = MIRBuilder.buildSub(
  1898. WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
  1899. }
  1900. MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
  1901. MI.eraseFromParent();
  1902. return Legalized;
  1903. }
  1904. case TargetOpcode::G_BSWAP: {
  1905. Observer.changingInstr(MI);
  1906. Register DstReg = MI.getOperand(0).getReg();
  1907. Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
  1908. Register DstExt = MRI.createGenericVirtualRegister(WideTy);
  1909. Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
  1910. widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
  1911. MI.getOperand(0).setReg(DstExt);
  1912. MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
  1913. LLT Ty = MRI.getType(DstReg);
  1914. unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
  1915. MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
  1916. MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
  1917. MIRBuilder.buildTrunc(DstReg, ShrReg);
  1918. Observer.changedInstr(MI);
  1919. return Legalized;
  1920. }
  1921. case TargetOpcode::G_BITREVERSE: {
  1922. Observer.changingInstr(MI);
  1923. Register DstReg = MI.getOperand(0).getReg();
  1924. LLT Ty = MRI.getType(DstReg);
  1925. unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
  1926. Register DstExt = MRI.createGenericVirtualRegister(WideTy);
  1927. widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
  1928. MI.getOperand(0).setReg(DstExt);
  1929. MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
  1930. auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
  1931. auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
  1932. MIRBuilder.buildTrunc(DstReg, Shift);
  1933. Observer.changedInstr(MI);
  1934. return Legalized;
  1935. }
  1936. case TargetOpcode::G_FREEZE:
  1937. Observer.changingInstr(MI);
  1938. widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
  1939. widenScalarDst(MI, WideTy);
  1940. Observer.changedInstr(MI);
  1941. return Legalized;
  1942. case TargetOpcode::G_ABS:
  1943. Observer.changingInstr(MI);
  1944. widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
  1945. widenScalarDst(MI, WideTy);
  1946. Observer.changedInstr(MI);
  1947. return Legalized;
  1948. case TargetOpcode::G_ADD:
  1949. case TargetOpcode::G_AND:
  1950. case TargetOpcode::G_MUL:
  1951. case TargetOpcode::G_OR:
  1952. case TargetOpcode::G_XOR:
  1953. case TargetOpcode::G_SUB:
  1954. // Perform operation at larger width (any extension is fines here, high bits
  1955. // don't affect the result) and then truncate the result back to the
  1956. // original type.
  1957. Observer.changingInstr(MI);
  1958. widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
  1959. widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
  1960. widenScalarDst(MI, WideTy);
  1961. Observer.changedInstr(MI);
  1962. return Legalized;
  1963. case TargetOpcode::G_SBFX:
  1964. case TargetOpcode::G_UBFX:
  1965. Observer.changingInstr(MI);
  1966. if (TypeIdx == 0) {
  1967. widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
  1968. widenScalarDst(MI, WideTy);
  1969. } else {
  1970. widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
  1971. widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
  1972. }
  1973. Observer.changedInstr(MI);
  1974. return Legalized;
  1975. case TargetOpcode::G_SHL:
  1976. Observer.changingInstr(MI);
  1977. if (TypeIdx == 0) {
  1978. widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
  1979. widenScalarDst(MI, WideTy);
  1980. } else {
  1981. assert(TypeIdx == 1);
  1982. // The "number of bits to shift" operand must preserve its value as an
  1983. // unsigned integer:
  1984. widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
  1985. }
  1986. Observer.changedInstr(MI);
  1987. return Legalized;
  1988. case TargetOpcode::G_SDIV:
  1989. case TargetOpcode::G_SREM:
  1990. case TargetOpcode::G_SMIN:
  1991. case TargetOpcode::G_SMAX:
  1992. Observer.changingInstr(MI);
  1993. widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
  1994. widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
  1995. widenScalarDst(MI, WideTy);
  1996. Observer.changedInstr(MI);
  1997. return Legalized;
  1998. case TargetOpcode::G_SDIVREM:
  1999. Observer.changingInstr(MI);
  2000. widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
  2001. widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
  2002. widenScalarDst(MI, WideTy);
  2003. widenScalarDst(MI, WideTy, 1);
  2004. Observer.changedInstr(MI);
  2005. return Legalized;
  2006. case TargetOpcode::G_ASHR:
  2007. case TargetOpcode::G_LSHR:
  2008. Observer.changingInstr(MI);
  2009. if (TypeIdx == 0) {
  2010. unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
  2011. TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
  2012. widenScalarSrc(MI, WideTy, 1, CvtOp);
  2013. widenScalarDst(MI, WideTy);
  2014. } else {
  2015. assert(TypeIdx == 1);
  2016. // The "number of bits to shift" operand must preserve its value as an
  2017. // unsigned integer:
  2018. widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
  2019. }
  2020. Observer.changedInstr(MI);
  2021. return Legalized;
  2022. case TargetOpcode::G_UDIV:
  2023. case TargetOpcode::G_UREM:
  2024. case TargetOpcode::G_UMIN:
  2025. case TargetOpcode::G_UMAX:
  2026. Observer.changingInstr(MI);
  2027. widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
  2028. widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
  2029. widenScalarDst(MI, WideTy);
  2030. Observer.changedInstr(MI);
  2031. return Legalized;
  2032. case TargetOpcode::G_UDIVREM:
  2033. Observer.changingInstr(MI);
  2034. widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
  2035. widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
  2036. widenScalarDst(MI, WideTy);
  2037. widenScalarDst(MI, WideTy, 1);
  2038. Observer.changedInstr(MI);
  2039. return Legalized;
  2040. case TargetOpcode::G_SELECT:
  2041. Observer.changingInstr(MI);
  2042. if (TypeIdx == 0) {
  2043. // Perform operation at larger width (any extension is fine here, high
  2044. // bits don't affect the result) and then truncate the result back to the
  2045. // original type.
  2046. widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
  2047. widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
  2048. widenScalarDst(MI, WideTy);
  2049. } else {
  2050. bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
  2051. // Explicit extension is required here since high bits affect the result.
  2052. widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
  2053. }
  2054. Observer.changedInstr(MI);
  2055. return Legalized;
  2056. case TargetOpcode::G_FPTOSI:
  2057. case TargetOpcode::G_FPTOUI:
  2058. Observer.changingInstr(MI);
  2059. if (TypeIdx == 0)
  2060. widenScalarDst(MI, WideTy);
  2061. else
  2062. widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
  2063. Observer.changedInstr(MI);
  2064. return Legalized;
  2065. case TargetOpcode::G_SITOFP:
  2066. Observer.changingInstr(MI);
  2067. if (TypeIdx == 0)
  2068. widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
  2069. else
  2070. widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
  2071. Observer.changedInstr(MI);
  2072. return Legalized;
  2073. case TargetOpcode::G_UITOFP:
  2074. Observer.changingInstr(MI);
  2075. if (TypeIdx == 0)
  2076. widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
  2077. else
  2078. widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
  2079. Observer.changedInstr(MI);
  2080. return Legalized;
  2081. case TargetOpcode::G_LOAD:
  2082. case TargetOpcode::G_SEXTLOAD:
  2083. case TargetOpcode::G_ZEXTLOAD:
  2084. Observer.changingInstr(MI);
  2085. widenScalarDst(MI, WideTy);
  2086. Observer.changedInstr(MI);
  2087. return Legalized;
  2088. case TargetOpcode::G_STORE: {
  2089. if (TypeIdx != 0)
  2090. return UnableToLegalize;
  2091. LLT Ty = MRI.getType(MI.getOperand(0).getReg());
  2092. if (!Ty.isScalar())
  2093. return UnableToLegalize;
  2094. Observer.changingInstr(MI);
  2095. unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
  2096. TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
  2097. widenScalarSrc(MI, WideTy, 0, ExtType);
  2098. Observer.changedInstr(MI);
  2099. return Legalized;
  2100. }
  2101. case TargetOpcode::G_CONSTANT: {
  2102. MachineOperand &SrcMO = MI.getOperand(1);
  2103. LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
  2104. unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
  2105. MRI.getType(MI.getOperand(0).getReg()));
  2106. assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
  2107. ExtOpc == TargetOpcode::G_ANYEXT) &&
  2108. "Illegal Extend");
  2109. const APInt &SrcVal = SrcMO.getCImm()->getValue();
  2110. const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
  2111. ? SrcVal.sext(WideTy.getSizeInBits())
  2112. : SrcVal.zext(WideTy.getSizeInBits());
  2113. Observer.changingInstr(MI);
  2114. SrcMO.setCImm(ConstantInt::get(Ctx, Val));
  2115. widenScalarDst(MI, WideTy);
  2116. Observer.changedInstr(MI);
  2117. return Legalized;
  2118. }
  2119. case TargetOpcode::G_FCONSTANT: {
  2120. MachineOperand &SrcMO = MI.getOperand(1);
  2121. LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
  2122. APFloat Val = SrcMO.getFPImm()->getValueAPF();
  2123. bool LosesInfo;
  2124. switch (WideTy.getSizeInBits()) {
  2125. case 32:
  2126. Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
  2127. &LosesInfo);
  2128. break;
  2129. case 64:
  2130. Val.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
  2131. &LosesInfo);
  2132. break;
  2133. default:
  2134. return UnableToLegalize;
  2135. }
  2136. assert(!LosesInfo && "extend should always be lossless");
  2137. Observer.changingInstr(MI);
  2138. SrcMO.setFPImm(ConstantFP::get(Ctx, Val));
  2139. widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
  2140. Observer.changedInstr(MI);
  2141. return Legalized;
  2142. }
  2143. case TargetOpcode::G_IMPLICIT_DEF: {
  2144. Observer.changingInstr(MI);
  2145. widenScalarDst(MI, WideTy);
  2146. Observer.changedInstr(MI);
  2147. return Legalized;
  2148. }
  2149. case TargetOpcode::G_BRCOND:
  2150. Observer.changingInstr(MI);
  2151. widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
  2152. Observer.changedInstr(MI);
  2153. return Legalized;
  2154. case TargetOpcode::G_FCMP:
  2155. Observer.changingInstr(MI);
  2156. if (TypeIdx == 0)
  2157. widenScalarDst(MI, WideTy);
  2158. else {
  2159. widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
  2160. widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
  2161. }
  2162. Observer.changedInstr(MI);
  2163. return Legalized;
  2164. case TargetOpcode::G_ICMP:
  2165. Observer.changingInstr(MI);
  2166. if (TypeIdx == 0)
  2167. widenScalarDst(MI, WideTy);
  2168. else {
  2169. unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
  2170. MI.getOperand(1).getPredicate()))
  2171. ? TargetOpcode::G_SEXT
  2172. : TargetOpcode::G_ZEXT;
  2173. widenScalarSrc(MI, WideTy, 2, ExtOpcode);
  2174. widenScalarSrc(MI, WideTy, 3, ExtOpcode);
  2175. }
  2176. Observer.changedInstr(MI);
  2177. return Legalized;
  2178. case TargetOpcode::G_PTR_ADD:
  2179. assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
  2180. Observer.changingInstr(MI);
  2181. widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
  2182. Observer.changedInstr(MI);
  2183. return Legalized;
  2184. case TargetOpcode::G_PHI: {
  2185. assert(TypeIdx == 0 && "Expecting only Idx 0");
  2186. Observer.changingInstr(MI);
  2187. for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
  2188. MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
  2189. MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
  2190. widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
  2191. }
  2192. MachineBasicBlock &MBB = *MI.getParent();
  2193. MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
  2194. widenScalarDst(MI, WideTy);
  2195. Observer.changedInstr(MI);
  2196. return Legalized;
  2197. }
  2198. case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
  2199. if (TypeIdx == 0) {
  2200. Register VecReg = MI.getOperand(1).getReg();
  2201. LLT VecTy = MRI.getType(VecReg);
  2202. Observer.changingInstr(MI);
  2203. widenScalarSrc(
  2204. MI, LLT::vector(VecTy.getElementCount(), WideTy.getSizeInBits()), 1,
  2205. TargetOpcode::G_ANYEXT);
  2206. widenScalarDst(MI, WideTy, 0);
  2207. Observer.changedInstr(MI);
  2208. return Legalized;
  2209. }
  2210. if (TypeIdx != 2)
  2211. return UnableToLegalize;
  2212. Observer.changingInstr(MI);
  2213. // TODO: Probably should be zext
  2214. widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
  2215. Observer.changedInstr(MI);
  2216. return Legalized;
  2217. }
  2218. case TargetOpcode::G_INSERT_VECTOR_ELT: {
  2219. if (TypeIdx == 1) {
  2220. Observer.changingInstr(MI);
  2221. Register VecReg = MI.getOperand(1).getReg();
  2222. LLT VecTy = MRI.getType(VecReg);
  2223. LLT WideVecTy = LLT::vector(VecTy.getElementCount(), WideTy);
  2224. widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
  2225. widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
  2226. widenScalarDst(MI, WideVecTy, 0);
  2227. Observer.changedInstr(MI);
  2228. return Legalized;
  2229. }
  2230. if (TypeIdx == 2) {
  2231. Observer.changingInstr(MI);
  2232. // TODO: Probably should be zext
  2233. widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
  2234. Observer.changedInstr(MI);
  2235. return Legalized;
  2236. }
  2237. return UnableToLegalize;
  2238. }
  2239. case TargetOpcode::G_FADD:
  2240. case TargetOpcode::G_FMUL:
  2241. case TargetOpcode::G_FSUB:
  2242. case TargetOpcode::G_FMA:
  2243. case TargetOpcode::G_FMAD:
  2244. case TargetOpcode::G_FNEG:
  2245. case TargetOpcode::G_FABS:
  2246. case TargetOpcode::G_FCANONICALIZE:
  2247. case TargetOpcode::G_FMINNUM:
  2248. case TargetOpcode::G_FMAXNUM:
  2249. case TargetOpcode::G_FMINNUM_IEEE:
  2250. case TargetOpcode::G_FMAXNUM_IEEE:
  2251. case TargetOpcode::G_FMINIMUM:
  2252. case TargetOpcode::G_FMAXIMUM:
  2253. case TargetOpcode::G_FDIV:
  2254. case TargetOpcode::G_FREM:
  2255. case TargetOpcode::G_FCEIL:
  2256. case TargetOpcode::G_FFLOOR:
  2257. case TargetOpcode::G_FCOS:
  2258. case TargetOpcode::G_FSIN:
  2259. case TargetOpcode::G_FLOG10:
  2260. case TargetOpcode::G_FLOG:
  2261. case TargetOpcode::G_FLOG2:
  2262. case TargetOpcode::G_FRINT:
  2263. case TargetOpcode::G_FNEARBYINT:
  2264. case TargetOpcode::G_FSQRT:
  2265. case TargetOpcode::G_FEXP:
  2266. case TargetOpcode::G_FEXP2:
  2267. case TargetOpcode::G_FPOW:
  2268. case TargetOpcode::G_INTRINSIC_TRUNC:
  2269. case TargetOpcode::G_INTRINSIC_ROUND:
  2270. case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
  2271. assert(TypeIdx == 0);
  2272. Observer.changingInstr(MI);
  2273. for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
  2274. widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
  2275. widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
  2276. Observer.changedInstr(MI);
  2277. return Legalized;
  2278. case TargetOpcode::G_FPOWI: {
  2279. if (TypeIdx != 0)
  2280. return UnableToLegalize;
  2281. Observer.changingInstr(MI);
  2282. widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
  2283. widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
  2284. Observer.changedInstr(MI);
  2285. return Legalized;
  2286. }
  2287. case TargetOpcode::G_INTTOPTR:
  2288. if (TypeIdx != 1)
  2289. return UnableToLegalize;
  2290. Observer.changingInstr(MI);
  2291. widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
  2292. Observer.changedInstr(MI);
  2293. return Legalized;
  2294. case TargetOpcode::G_PTRTOINT:
  2295. if (TypeIdx != 0)
  2296. return UnableToLegalize;
  2297. Observer.changingInstr(MI);
  2298. widenScalarDst(MI, WideTy, 0);
  2299. Observer.changedInstr(MI);
  2300. return Legalized;
  2301. case TargetOpcode::G_BUILD_VECTOR: {
  2302. Observer.changingInstr(MI);
  2303. const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
  2304. for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
  2305. widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
  2306. // Avoid changing the result vector type if the source element type was
  2307. // requested.
  2308. if (TypeIdx == 1) {
  2309. MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
  2310. } else {
  2311. widenScalarDst(MI, WideTy, 0);
  2312. }
  2313. Observer.changedInstr(MI);
  2314. return Legalized;
  2315. }
  2316. case TargetOpcode::G_SEXT_INREG:
  2317. if (TypeIdx != 0)
  2318. return UnableToLegalize;
  2319. Observer.changingInstr(MI);
  2320. widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
  2321. widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
  2322. Observer.changedInstr(MI);
  2323. return Legalized;
  2324. case TargetOpcode::G_PTRMASK: {
  2325. if (TypeIdx != 1)
  2326. return UnableToLegalize;
  2327. Observer.changingInstr(MI);
  2328. widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
  2329. Observer.changedInstr(MI);
  2330. return Legalized;
  2331. }
  2332. }
  2333. }
  2334. static void getUnmergePieces(SmallVectorImpl<Register> &Pieces,
  2335. MachineIRBuilder &B, Register Src, LLT Ty) {
  2336. auto Unmerge = B.buildUnmerge(Ty, Src);
  2337. for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
  2338. Pieces.push_back(Unmerge.getReg(I));
  2339. }
  2340. LegalizerHelper::LegalizeResult
  2341. LegalizerHelper::lowerBitcast(MachineInstr &MI) {
  2342. Register Dst = MI.getOperand(0).getReg();
  2343. Register Src = MI.getOperand(1).getReg();
  2344. LLT DstTy = MRI.getType(Dst);
  2345. LLT SrcTy = MRI.getType(Src);
  2346. if (SrcTy.isVector()) {
  2347. LLT SrcEltTy = SrcTy.getElementType();
  2348. SmallVector<Register, 8> SrcRegs;
  2349. if (DstTy.isVector()) {
  2350. int NumDstElt = DstTy.getNumElements();
  2351. int NumSrcElt = SrcTy.getNumElements();
  2352. LLT DstEltTy = DstTy.getElementType();
  2353. LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
  2354. LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
  2355. // If there's an element size mismatch, insert intermediate casts to match
  2356. // the result element type.
  2357. if (NumSrcElt < NumDstElt) { // Source element type is larger.
  2358. // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
  2359. //
  2360. // =>
  2361. //
  2362. // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
  2363. // %3:_(<2 x s8>) = G_BITCAST %2
  2364. // %4:_(<2 x s8>) = G_BITCAST %3
  2365. // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
  2366. DstCastTy = LLT::fixed_vector(NumDstElt / NumSrcElt, DstEltTy);
  2367. SrcPartTy = SrcEltTy;
  2368. } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
  2369. //
  2370. // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
  2371. //
  2372. // =>
  2373. //
  2374. // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
  2375. // %3:_(s16) = G_BITCAST %2
  2376. // %4:_(s16) = G_BITCAST %3
  2377. // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
  2378. SrcPartTy = LLT::fixed_vector(NumSrcElt / NumDstElt, SrcEltTy);
  2379. DstCastTy = DstEltTy;
  2380. }
  2381. getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
  2382. for (Register &SrcReg : SrcRegs)
  2383. SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
  2384. } else
  2385. getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
  2386. MIRBuilder.buildMerge(Dst, SrcRegs);
  2387. MI.eraseFromParent();
  2388. return Legalized;
  2389. }
  2390. if (DstTy.isVector()) {
  2391. SmallVector<Register, 8> SrcRegs;
  2392. getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
  2393. MIRBuilder.buildMerge(Dst, SrcRegs);
  2394. MI.eraseFromParent();
  2395. return Legalized;
  2396. }
  2397. return UnableToLegalize;
  2398. }
  2399. /// Figure out the bit offset into a register when coercing a vector index for
  2400. /// the wide element type. This is only for the case when promoting vector to
  2401. /// one with larger elements.
  2402. //
  2403. ///
  2404. /// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
  2405. /// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
  2406. static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B,
  2407. Register Idx,
  2408. unsigned NewEltSize,
  2409. unsigned OldEltSize) {
  2410. const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
  2411. LLT IdxTy = B.getMRI()->getType(Idx);
  2412. // Now figure out the amount we need to shift to get the target bits.
  2413. auto OffsetMask = B.buildConstant(
  2414. IdxTy, ~(APInt::getAllOnes(IdxTy.getSizeInBits()) << Log2EltRatio));
  2415. auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
  2416. return B.buildShl(IdxTy, OffsetIdx,
  2417. B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
  2418. }
  2419. /// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
  2420. /// is casting to a vector with a smaller element size, perform multiple element
  2421. /// extracts and merge the results. If this is coercing to a vector with larger
  2422. /// elements, index the bitcasted vector and extract the target element with bit
  2423. /// operations. This is intended to force the indexing in the native register
  2424. /// size for architectures that can dynamically index the register file.
  2425. LegalizerHelper::LegalizeResult
  2426. LegalizerHelper::bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx,
  2427. LLT CastTy) {
  2428. if (TypeIdx != 1)
  2429. return UnableToLegalize;
  2430. Register Dst = MI.getOperand(0).getReg();
  2431. Register SrcVec = MI.getOperand(1).getReg();
  2432. Register Idx = MI.getOperand(2).getReg();
  2433. LLT SrcVecTy = MRI.getType(SrcVec);
  2434. LLT IdxTy = MRI.getType(Idx);
  2435. LLT SrcEltTy = SrcVecTy.getElementType();
  2436. unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
  2437. unsigned OldNumElts = SrcVecTy.getNumElements();
  2438. LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
  2439. Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
  2440. const unsigned NewEltSize = NewEltTy.getSizeInBits();
  2441. const unsigned OldEltSize = SrcEltTy.getSizeInBits();
  2442. if (NewNumElts > OldNumElts) {
  2443. // Decreasing the vector element size
  2444. //
  2445. // e.g. i64 = extract_vector_elt x:v2i64, y:i32
  2446. // =>
  2447. // v4i32:castx = bitcast x:v2i64
  2448. //
  2449. // i64 = bitcast
  2450. // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
  2451. // (i32 (extract_vector_elt castx, (2 * y + 1)))
  2452. //
  2453. if (NewNumElts % OldNumElts != 0)
  2454. return UnableToLegalize;
  2455. // Type of the intermediate result vector.
  2456. const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
  2457. LLT MidTy =
  2458. LLT::scalarOrVector(ElementCount::getFixed(NewEltsPerOldElt), NewEltTy);
  2459. auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
  2460. SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
  2461. auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
  2462. for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
  2463. auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
  2464. auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
  2465. auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
  2466. NewOps[I] = Elt.getReg(0);
  2467. }
  2468. auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
  2469. MIRBuilder.buildBitcast(Dst, NewVec);
  2470. MI.eraseFromParent();
  2471. return Legalized;
  2472. }
  2473. if (NewNumElts < OldNumElts) {
  2474. if (NewEltSize % OldEltSize != 0)
  2475. return UnableToLegalize;
  2476. // This only depends on powers of 2 because we use bit tricks to figure out
  2477. // the bit offset we need to shift to get the target element. A general
  2478. // expansion could emit division/multiply.
  2479. if (!isPowerOf2_32(NewEltSize / OldEltSize))
  2480. return UnableToLegalize;
  2481. // Increasing the vector element size.
  2482. // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
  2483. //
  2484. // =>
  2485. //
  2486. // %cast = G_BITCAST %vec
  2487. // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
  2488. // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
  2489. // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
  2490. // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
  2491. // %elt_bits = G_LSHR %wide_elt, %offset_bits
  2492. // %elt = G_TRUNC %elt_bits
  2493. const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
  2494. auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
  2495. // Divide to get the index in the wider element type.
  2496. auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
  2497. Register WideElt = CastVec;
  2498. if (CastTy.isVector()) {
  2499. WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
  2500. ScaledIdx).getReg(0);
  2501. }
  2502. // Compute the bit offset into the register of the target element.
  2503. Register OffsetBits = getBitcastWiderVectorElementOffset(
  2504. MIRBuilder, Idx, NewEltSize, OldEltSize);
  2505. // Shift the wide element to get the target element.
  2506. auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
  2507. MIRBuilder.buildTrunc(Dst, ExtractedBits);
  2508. MI.eraseFromParent();
  2509. return Legalized;
  2510. }
  2511. return UnableToLegalize;
  2512. }
  2513. /// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
  2514. /// TargetReg, while preserving other bits in \p TargetReg.
  2515. ///
  2516. /// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
  2517. static Register buildBitFieldInsert(MachineIRBuilder &B,
  2518. Register TargetReg, Register InsertReg,
  2519. Register OffsetBits) {
  2520. LLT TargetTy = B.getMRI()->getType(TargetReg);
  2521. LLT InsertTy = B.getMRI()->getType(InsertReg);
  2522. auto ZextVal = B.buildZExt(TargetTy, InsertReg);
  2523. auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
  2524. // Produce a bitmask of the value to insert
  2525. auto EltMask = B.buildConstant(
  2526. TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
  2527. InsertTy.getSizeInBits()));
  2528. // Shift it into position
  2529. auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
  2530. auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
  2531. // Clear out the bits in the wide element
  2532. auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
  2533. // The value to insert has all zeros already, so stick it into the masked
  2534. // wide element.
  2535. return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
  2536. }
  2537. /// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
  2538. /// is increasing the element size, perform the indexing in the target element
  2539. /// type, and use bit operations to insert at the element position. This is
  2540. /// intended for architectures that can dynamically index the register file and
  2541. /// want to force indexing in the native register size.
  2542. LegalizerHelper::LegalizeResult
  2543. LegalizerHelper::bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx,
  2544. LLT CastTy) {
  2545. if (TypeIdx != 0)
  2546. return UnableToLegalize;
  2547. Register Dst = MI.getOperand(0).getReg();
  2548. Register SrcVec = MI.getOperand(1).getReg();
  2549. Register Val = MI.getOperand(2).getReg();
  2550. Register Idx = MI.getOperand(3).getReg();
  2551. LLT VecTy = MRI.getType(Dst);
  2552. LLT IdxTy = MRI.getType(Idx);
  2553. LLT VecEltTy = VecTy.getElementType();
  2554. LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
  2555. const unsigned NewEltSize = NewEltTy.getSizeInBits();
  2556. const unsigned OldEltSize = VecEltTy.getSizeInBits();
  2557. unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
  2558. unsigned OldNumElts = VecTy.getNumElements();
  2559. Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
  2560. if (NewNumElts < OldNumElts) {
  2561. if (NewEltSize % OldEltSize != 0)
  2562. return UnableToLegalize;
  2563. // This only depends on powers of 2 because we use bit tricks to figure out
  2564. // the bit offset we need to shift to get the target element. A general
  2565. // expansion could emit division/multiply.
  2566. if (!isPowerOf2_32(NewEltSize / OldEltSize))
  2567. return UnableToLegalize;
  2568. const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
  2569. auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
  2570. // Divide to get the index in the wider element type.
  2571. auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
  2572. Register ExtractedElt = CastVec;
  2573. if (CastTy.isVector()) {
  2574. ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
  2575. ScaledIdx).getReg(0);
  2576. }
  2577. // Compute the bit offset into the register of the target element.
  2578. Register OffsetBits = getBitcastWiderVectorElementOffset(
  2579. MIRBuilder, Idx, NewEltSize, OldEltSize);
  2580. Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
  2581. Val, OffsetBits);
  2582. if (CastTy.isVector()) {
  2583. InsertedElt = MIRBuilder.buildInsertVectorElement(
  2584. CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
  2585. }
  2586. MIRBuilder.buildBitcast(Dst, InsertedElt);
  2587. MI.eraseFromParent();
  2588. return Legalized;
  2589. }
  2590. return UnableToLegalize;
  2591. }
  2592. LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) {
  2593. // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
  2594. Register DstReg = LoadMI.getDstReg();
  2595. Register PtrReg = LoadMI.getPointerReg();
  2596. LLT DstTy = MRI.getType(DstReg);
  2597. MachineMemOperand &MMO = LoadMI.getMMO();
  2598. LLT MemTy = MMO.getMemoryType();
  2599. MachineFunction &MF = MIRBuilder.getMF();
  2600. unsigned MemSizeInBits = MemTy.getSizeInBits();
  2601. unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
  2602. if (MemSizeInBits != MemStoreSizeInBits) {
  2603. if (MemTy.isVector())
  2604. return UnableToLegalize;
  2605. // Promote to a byte-sized load if not loading an integral number of
  2606. // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
  2607. LLT WideMemTy = LLT::scalar(MemStoreSizeInBits);
  2608. MachineMemOperand *NewMMO =
  2609. MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy);
  2610. Register LoadReg = DstReg;
  2611. LLT LoadTy = DstTy;
  2612. // If this wasn't already an extending load, we need to widen the result
  2613. // register to avoid creating a load with a narrower result than the source.
  2614. if (MemStoreSizeInBits > DstTy.getSizeInBits()) {
  2615. LoadTy = WideMemTy;
  2616. LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
  2617. }
  2618. if (isa<GSExtLoad>(LoadMI)) {
  2619. auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
  2620. MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
  2621. } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == DstTy) {
  2622. auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
  2623. // The extra bits are guaranteed to be zero, since we stored them that
  2624. // way. A zext load from Wide thus automatically gives zext from MemVT.
  2625. MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
  2626. } else {
  2627. MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
  2628. }
  2629. if (DstTy != LoadTy)
  2630. MIRBuilder.buildTrunc(DstReg, LoadReg);
  2631. LoadMI.eraseFromParent();
  2632. return Legalized;
  2633. }
  2634. // Big endian lowering not implemented.
  2635. if (MIRBuilder.getDataLayout().isBigEndian())
  2636. return UnableToLegalize;
  2637. // This load needs splitting into power of 2 sized loads.
  2638. //
  2639. // Our strategy here is to generate anyextending loads for the smaller
  2640. // types up to next power-2 result type, and then combine the two larger
  2641. // result values together, before truncating back down to the non-pow-2
  2642. // type.
  2643. // E.g. v1 = i24 load =>
  2644. // v2 = i32 zextload (2 byte)
  2645. // v3 = i32 load (1 byte)
  2646. // v4 = i32 shl v3, 16
  2647. // v5 = i32 or v4, v2
  2648. // v1 = i24 trunc v5
  2649. // By doing this we generate the correct truncate which should get
  2650. // combined away as an artifact with a matching extend.
  2651. uint64_t LargeSplitSize, SmallSplitSize;
  2652. if (!isPowerOf2_32(MemSizeInBits)) {
  2653. // This load needs splitting into power of 2 sized loads.
  2654. LargeSplitSize = PowerOf2Floor(MemSizeInBits);
  2655. SmallSplitSize = MemSizeInBits - LargeSplitSize;
  2656. } else {
  2657. // This is already a power of 2, but we still need to split this in half.
  2658. //
  2659. // Assume we're being asked to decompose an unaligned load.
  2660. // TODO: If this requires multiple splits, handle them all at once.
  2661. auto &Ctx = MF.getFunction().getContext();
  2662. if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
  2663. return UnableToLegalize;
  2664. SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
  2665. }
  2666. if (MemTy.isVector()) {
  2667. // TODO: Handle vector extloads
  2668. if (MemTy != DstTy)
  2669. return UnableToLegalize;
  2670. // TODO: We can do better than scalarizing the vector and at least split it
  2671. // in half.
  2672. return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType());
  2673. }
  2674. MachineMemOperand *LargeMMO =
  2675. MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
  2676. MachineMemOperand *SmallMMO =
  2677. MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
  2678. LLT PtrTy = MRI.getType(PtrReg);
  2679. unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits());
  2680. LLT AnyExtTy = LLT::scalar(AnyExtSize);
  2681. auto LargeLoad = MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
  2682. PtrReg, *LargeMMO);
  2683. auto OffsetCst = MIRBuilder.buildConstant(LLT::scalar(PtrTy.getSizeInBits()),
  2684. LargeSplitSize / 8);
  2685. Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
  2686. auto SmallPtr = MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst);
  2687. auto SmallLoad = MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), AnyExtTy,
  2688. SmallPtr, *SmallMMO);
  2689. auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
  2690. auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
  2691. if (AnyExtTy == DstTy)
  2692. MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
  2693. else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) {
  2694. auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
  2695. MIRBuilder.buildTrunc(DstReg, {Or});
  2696. } else {
  2697. assert(DstTy.isPointer() && "expected pointer");
  2698. auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
  2699. // FIXME: We currently consider this to be illegal for non-integral address
  2700. // spaces, but we need still need a way to reinterpret the bits.
  2701. MIRBuilder.buildIntToPtr(DstReg, Or);
  2702. }
  2703. LoadMI.eraseFromParent();
  2704. return Legalized;
  2705. }
  2706. LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) {
  2707. // Lower a non-power of 2 store into multiple pow-2 stores.
  2708. // E.g. split an i24 store into an i16 store + i8 store.
  2709. // We do this by first extending the stored value to the next largest power
  2710. // of 2 type, and then using truncating stores to store the components.
  2711. // By doing this, likewise with G_LOAD, generate an extend that can be
  2712. // artifact-combined away instead of leaving behind extracts.
  2713. Register SrcReg = StoreMI.getValueReg();
  2714. Register PtrReg = StoreMI.getPointerReg();
  2715. LLT SrcTy = MRI.getType(SrcReg);
  2716. MachineFunction &MF = MIRBuilder.getMF();
  2717. MachineMemOperand &MMO = **StoreMI.memoperands_begin();
  2718. LLT MemTy = MMO.getMemoryType();
  2719. unsigned StoreWidth = MemTy.getSizeInBits();
  2720. unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
  2721. if (StoreWidth != StoreSizeInBits) {
  2722. if (SrcTy.isVector())
  2723. return UnableToLegalize;
  2724. // Promote to a byte-sized store with upper bits zero if not
  2725. // storing an integral number of bytes. For example, promote
  2726. // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
  2727. LLT WideTy = LLT::scalar(StoreSizeInBits);
  2728. if (StoreSizeInBits > SrcTy.getSizeInBits()) {
  2729. // Avoid creating a store with a narrower source than result.
  2730. SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
  2731. SrcTy = WideTy;
  2732. }
  2733. auto ZextInReg = MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
  2734. MachineMemOperand *NewMMO =
  2735. MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy);
  2736. MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
  2737. StoreMI.eraseFromParent();
  2738. return Legalized;
  2739. }
  2740. if (MemTy.isVector()) {
  2741. // TODO: Handle vector trunc stores
  2742. if (MemTy != SrcTy)
  2743. return UnableToLegalize;
  2744. // TODO: We can do better than scalarizing the vector and at least split it
  2745. // in half.
  2746. return reduceLoadStoreWidth(StoreMI, 0, SrcTy.getElementType());
  2747. }
  2748. unsigned MemSizeInBits = MemTy.getSizeInBits();
  2749. uint64_t LargeSplitSize, SmallSplitSize;
  2750. if (!isPowerOf2_32(MemSizeInBits)) {
  2751. LargeSplitSize = PowerOf2Floor(MemTy.getSizeInBits());
  2752. SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
  2753. } else {
  2754. auto &Ctx = MF.getFunction().getContext();
  2755. if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
  2756. return UnableToLegalize; // Don't know what we're being asked to do.
  2757. SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
  2758. }
  2759. // Extend to the next pow-2. If this store was itself the result of lowering,
  2760. // e.g. an s56 store being broken into s32 + s24, we might have a stored type
  2761. // that's wider than the stored size.
  2762. unsigned AnyExtSize = PowerOf2Ceil(MemTy.getSizeInBits());
  2763. const LLT NewSrcTy = LLT::scalar(AnyExtSize);
  2764. if (SrcTy.isPointer()) {
  2765. const LLT IntPtrTy = LLT::scalar(SrcTy.getSizeInBits());
  2766. SrcReg = MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
  2767. }
  2768. auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
  2769. // Obtain the smaller value by shifting away the larger value.
  2770. auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
  2771. auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
  2772. // Generate the PtrAdd and truncating stores.
  2773. LLT PtrTy = MRI.getType(PtrReg);
  2774. auto OffsetCst = MIRBuilder.buildConstant(
  2775. LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
  2776. auto SmallPtr =
  2777. MIRBuilder.buildPtrAdd(PtrTy, PtrReg, OffsetCst);
  2778. MachineMemOperand *LargeMMO =
  2779. MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
  2780. MachineMemOperand *SmallMMO =
  2781. MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
  2782. MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
  2783. MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
  2784. StoreMI.eraseFromParent();
  2785. return Legalized;
  2786. }
  2787. LegalizerHelper::LegalizeResult
  2788. LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
  2789. switch (MI.getOpcode()) {
  2790. case TargetOpcode::G_LOAD: {
  2791. if (TypeIdx != 0)
  2792. return UnableToLegalize;
  2793. MachineMemOperand &MMO = **MI.memoperands_begin();
  2794. // Not sure how to interpret a bitcast of an extending load.
  2795. if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
  2796. return UnableToLegalize;
  2797. Observer.changingInstr(MI);
  2798. bitcastDst(MI, CastTy, 0);
  2799. MMO.setType(CastTy);
  2800. Observer.changedInstr(MI);
  2801. return Legalized;
  2802. }
  2803. case TargetOpcode::G_STORE: {
  2804. if (TypeIdx != 0)
  2805. return UnableToLegalize;
  2806. MachineMemOperand &MMO = **MI.memoperands_begin();
  2807. // Not sure how to interpret a bitcast of a truncating store.
  2808. if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
  2809. return UnableToLegalize;
  2810. Observer.changingInstr(MI);
  2811. bitcastSrc(MI, CastTy, 0);
  2812. MMO.setType(CastTy);
  2813. Observer.changedInstr(MI);
  2814. return Legalized;
  2815. }
  2816. case TargetOpcode::G_SELECT: {
  2817. if (TypeIdx != 0)
  2818. return UnableToLegalize;
  2819. if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
  2820. LLVM_DEBUG(
  2821. dbgs() << "bitcast action not implemented for vector select\n");
  2822. return UnableToLegalize;
  2823. }
  2824. Observer.changingInstr(MI);
  2825. bitcastSrc(MI, CastTy, 2);
  2826. bitcastSrc(MI, CastTy, 3);
  2827. bitcastDst(MI, CastTy, 0);
  2828. Observer.changedInstr(MI);
  2829. return Legalized;
  2830. }
  2831. case TargetOpcode::G_AND:
  2832. case TargetOpcode::G_OR:
  2833. case TargetOpcode::G_XOR: {
  2834. Observer.changingInstr(MI);
  2835. bitcastSrc(MI, CastTy, 1);
  2836. bitcastSrc(MI, CastTy, 2);
  2837. bitcastDst(MI, CastTy, 0);
  2838. Observer.changedInstr(MI);
  2839. return Legalized;
  2840. }
  2841. case TargetOpcode::G_EXTRACT_VECTOR_ELT:
  2842. return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
  2843. case TargetOpcode::G_INSERT_VECTOR_ELT:
  2844. return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
  2845. default:
  2846. return UnableToLegalize;
  2847. }
  2848. }
  2849. // Legalize an instruction by changing the opcode in place.
  2850. void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
  2851. Observer.changingInstr(MI);
  2852. MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
  2853. Observer.changedInstr(MI);
  2854. }
  2855. LegalizerHelper::LegalizeResult
  2856. LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
  2857. using namespace TargetOpcode;
  2858. switch(MI.getOpcode()) {
  2859. default:
  2860. return UnableToLegalize;
  2861. case TargetOpcode::G_BITCAST:
  2862. return lowerBitcast(MI);
  2863. case TargetOpcode::G_SREM:
  2864. case TargetOpcode::G_UREM: {
  2865. LLT Ty = MRI.getType(MI.getOperand(0).getReg());
  2866. auto Quot =
  2867. MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
  2868. {MI.getOperand(1), MI.getOperand(2)});
  2869. auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
  2870. MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
  2871. MI.eraseFromParent();
  2872. return Legalized;
  2873. }
  2874. case TargetOpcode::G_SADDO:
  2875. case TargetOpcode::G_SSUBO:
  2876. return lowerSADDO_SSUBO(MI);
  2877. case TargetOpcode::G_UMULH:
  2878. case TargetOpcode::G_SMULH:
  2879. return lowerSMULH_UMULH(MI);
  2880. case TargetOpcode::G_SMULO:
  2881. case TargetOpcode::G_UMULO: {
  2882. // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
  2883. // result.
  2884. Register Res = MI.getOperand(0).getReg();
  2885. Register Overflow = MI.getOperand(1).getReg();
  2886. Register LHS = MI.getOperand(2).getReg();
  2887. Register RHS = MI.getOperand(3).getReg();
  2888. LLT Ty = MRI.getType(Res);
  2889. unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
  2890. ? TargetOpcode::G_SMULH
  2891. : TargetOpcode::G_UMULH;
  2892. Observer.changingInstr(MI);
  2893. const auto &TII = MIRBuilder.getTII();
  2894. MI.setDesc(TII.get(TargetOpcode::G_MUL));
  2895. MI.RemoveOperand(1);
  2896. Observer.changedInstr(MI);
  2897. auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
  2898. auto Zero = MIRBuilder.buildConstant(Ty, 0);
  2899. // Move insert point forward so we can use the Res register if needed.
  2900. MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
  2901. // For *signed* multiply, overflow is detected by checking:
  2902. // (hi != (lo >> bitwidth-1))
  2903. if (Opcode == TargetOpcode::G_SMULH) {
  2904. auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
  2905. auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
  2906. MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
  2907. } else {
  2908. MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
  2909. }
  2910. return Legalized;
  2911. }
  2912. case TargetOpcode::G_FNEG: {
  2913. Register Res = MI.getOperand(0).getReg();
  2914. LLT Ty = MRI.getType(Res);
  2915. // TODO: Handle vector types once we are able to
  2916. // represent them.
  2917. if (Ty.isVector())
  2918. return UnableToLegalize;
  2919. auto SignMask =
  2920. MIRBuilder.buildConstant(Ty, APInt::getSignMask(Ty.getSizeInBits()));
  2921. Register SubByReg = MI.getOperand(1).getReg();
  2922. MIRBuilder.buildXor(Res, SubByReg, SignMask);
  2923. MI.eraseFromParent();
  2924. return Legalized;
  2925. }
  2926. case TargetOpcode::G_FSUB: {
  2927. Register Res = MI.getOperand(0).getReg();
  2928. LLT Ty = MRI.getType(Res);
  2929. // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
  2930. // First, check if G_FNEG is marked as Lower. If so, we may
  2931. // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
  2932. if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
  2933. return UnableToLegalize;
  2934. Register LHS = MI.getOperand(1).getReg();
  2935. Register RHS = MI.getOperand(2).getReg();
  2936. Register Neg = MRI.createGenericVirtualRegister(Ty);
  2937. MIRBuilder.buildFNeg(Neg, RHS);
  2938. MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
  2939. MI.eraseFromParent();
  2940. return Legalized;
  2941. }
  2942. case TargetOpcode::G_FMAD:
  2943. return lowerFMad(MI);
  2944. case TargetOpcode::G_FFLOOR:
  2945. return lowerFFloor(MI);
  2946. case TargetOpcode::G_INTRINSIC_ROUND:
  2947. return lowerIntrinsicRound(MI);
  2948. case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
  2949. // Since round even is the assumed rounding mode for unconstrained FP
  2950. // operations, rint and roundeven are the same operation.
  2951. changeOpcode(MI, TargetOpcode::G_FRINT);
  2952. return Legalized;
  2953. }
  2954. case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
  2955. Register OldValRes = MI.getOperand(0).getReg();
  2956. Register SuccessRes = MI.getOperand(1).getReg();
  2957. Register Addr = MI.getOperand(2).getReg();
  2958. Register CmpVal = MI.getOperand(3).getReg();
  2959. Register NewVal = MI.getOperand(4).getReg();
  2960. MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
  2961. **MI.memoperands_begin());
  2962. MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
  2963. MI.eraseFromParent();
  2964. return Legalized;
  2965. }
  2966. case TargetOpcode::G_LOAD:
  2967. case TargetOpcode::G_SEXTLOAD:
  2968. case TargetOpcode::G_ZEXTLOAD:
  2969. return lowerLoad(cast<GAnyLoad>(MI));
  2970. case TargetOpcode::G_STORE:
  2971. return lowerStore(cast<GStore>(MI));
  2972. case TargetOpcode::G_CTLZ_ZERO_UNDEF:
  2973. case TargetOpcode::G_CTTZ_ZERO_UNDEF:
  2974. case TargetOpcode::G_CTLZ:
  2975. case TargetOpcode::G_CTTZ:
  2976. case TargetOpcode::G_CTPOP:
  2977. return lowerBitCount(MI);
  2978. case G_UADDO: {
  2979. Register Res = MI.getOperand(0).getReg();
  2980. Register CarryOut = MI.getOperand(1).getReg();
  2981. Register LHS = MI.getOperand(2).getReg();
  2982. Register RHS = MI.getOperand(3).getReg();
  2983. MIRBuilder.buildAdd(Res, LHS, RHS);
  2984. MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS);
  2985. MI.eraseFromParent();
  2986. return Legalized;
  2987. }
  2988. case G_UADDE: {
  2989. Register Res = MI.getOperand(0).getReg();
  2990. Register CarryOut = MI.getOperand(1).getReg();
  2991. Register LHS = MI.getOperand(2).getReg();
  2992. Register RHS = MI.getOperand(3).getReg();
  2993. Register CarryIn = MI.getOperand(4).getReg();
  2994. LLT Ty = MRI.getType(Res);
  2995. auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
  2996. auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
  2997. MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
  2998. MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS);
  2999. MI.eraseFromParent();
  3000. return Legalized;
  3001. }
  3002. case G_USUBO: {
  3003. Register Res = MI.getOperand(0).getReg();
  3004. Register BorrowOut = MI.getOperand(1).getReg();
  3005. Register LHS = MI.getOperand(2).getReg();
  3006. Register RHS = MI.getOperand(3).getReg();
  3007. MIRBuilder.buildSub(Res, LHS, RHS);
  3008. MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
  3009. MI.eraseFromParent();
  3010. return Legalized;
  3011. }
  3012. case G_USUBE: {
  3013. Register Res = MI.getOperand(0).getReg();
  3014. Register BorrowOut = MI.getOperand(1).getReg();
  3015. Register LHS = MI.getOperand(2).getReg();
  3016. Register RHS = MI.getOperand(3).getReg();
  3017. Register BorrowIn = MI.getOperand(4).getReg();
  3018. const LLT CondTy = MRI.getType(BorrowOut);
  3019. const LLT Ty = MRI.getType(Res);
  3020. auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
  3021. auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
  3022. MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
  3023. auto LHS_EQ_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, LHS, RHS);
  3024. auto LHS_ULT_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, LHS, RHS);
  3025. MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS);
  3026. MI.eraseFromParent();
  3027. return Legalized;
  3028. }
  3029. case G_UITOFP:
  3030. return lowerUITOFP(MI);
  3031. case G_SITOFP:
  3032. return lowerSITOFP(MI);
  3033. case G_FPTOUI:
  3034. return lowerFPTOUI(MI);
  3035. case G_FPTOSI:
  3036. return lowerFPTOSI(MI);
  3037. case G_FPTRUNC:
  3038. return lowerFPTRUNC(MI);
  3039. case G_FPOWI:
  3040. return lowerFPOWI(MI);
  3041. case G_SMIN:
  3042. case G_SMAX:
  3043. case G_UMIN:
  3044. case G_UMAX:
  3045. return lowerMinMax(MI);
  3046. case G_FCOPYSIGN:
  3047. return lowerFCopySign(MI);
  3048. case G_FMINNUM:
  3049. case G_FMAXNUM:
  3050. return lowerFMinNumMaxNum(MI);
  3051. case G_MERGE_VALUES:
  3052. return lowerMergeValues(MI);
  3053. case G_UNMERGE_VALUES:
  3054. return lowerUnmergeValues(MI);
  3055. case TargetOpcode::G_SEXT_INREG: {
  3056. assert(MI.getOperand(2).isImm() && "Expected immediate");
  3057. int64_t SizeInBits = MI.getOperand(2).getImm();
  3058. Register DstReg = MI.getOperand(0).getReg();
  3059. Register SrcReg = MI.getOperand(1).getReg();
  3060. LLT DstTy = MRI.getType(DstReg);
  3061. Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
  3062. auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
  3063. MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
  3064. MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
  3065. MI.eraseFromParent();
  3066. return Legalized;
  3067. }
  3068. case G_EXTRACT_VECTOR_ELT:
  3069. case G_INSERT_VECTOR_ELT:
  3070. return lowerExtractInsertVectorElt(MI);
  3071. case G_SHUFFLE_VECTOR:
  3072. return lowerShuffleVector(MI);
  3073. case G_DYN_STACKALLOC:
  3074. return lowerDynStackAlloc(MI);
  3075. case G_EXTRACT:
  3076. return lowerExtract(MI);
  3077. case G_INSERT:
  3078. return lowerInsert(MI);
  3079. case G_BSWAP:
  3080. return lowerBswap(MI);
  3081. case G_BITREVERSE:
  3082. return lowerBitreverse(MI);
  3083. case G_READ_REGISTER:
  3084. case G_WRITE_REGISTER:
  3085. return lowerReadWriteRegister(MI);
  3086. case G_UADDSAT:
  3087. case G_USUBSAT: {
  3088. // Try to make a reasonable guess about which lowering strategy to use. The
  3089. // target can override this with custom lowering and calling the
  3090. // implementation functions.
  3091. LLT Ty = MRI.getType(MI.getOperand(0).getReg());
  3092. if (LI.isLegalOrCustom({G_UMIN, Ty}))
  3093. return lowerAddSubSatToMinMax(MI);
  3094. return lowerAddSubSatToAddoSubo(MI);
  3095. }
  3096. case G_SADDSAT:
  3097. case G_SSUBSAT: {
  3098. LLT Ty = MRI.getType(MI.getOperand(0).getReg());
  3099. // FIXME: It would probably make more sense to see if G_SADDO is preferred,
  3100. // since it's a shorter expansion. However, we would need to figure out the
  3101. // preferred boolean type for the carry out for the query.
  3102. if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
  3103. return lowerAddSubSatToMinMax(MI);
  3104. return lowerAddSubSatToAddoSubo(MI);
  3105. }
  3106. case G_SSHLSAT:
  3107. case G_USHLSAT:
  3108. return lowerShlSat(MI);
  3109. case G_ABS:
  3110. return lowerAbsToAddXor(MI);
  3111. case G_SELECT:
  3112. return lowerSelect(MI);
  3113. case G_SDIVREM:
  3114. case G_UDIVREM:
  3115. return lowerDIVREM(MI);
  3116. case G_FSHL:
  3117. case G_FSHR:
  3118. return lowerFunnelShift(MI);
  3119. case G_ROTL:
  3120. case G_ROTR:
  3121. return lowerRotate(MI);
  3122. case G_MEMSET:
  3123. case G_MEMCPY:
  3124. case G_MEMMOVE:
  3125. return lowerMemCpyFamily(MI);
  3126. case G_MEMCPY_INLINE:
  3127. return lowerMemcpyInline(MI);
  3128. GISEL_VECREDUCE_CASES_NONSEQ
  3129. return lowerVectorReduction(MI);
  3130. }
  3131. }
  3132. Align LegalizerHelper::getStackTemporaryAlignment(LLT Ty,
  3133. Align MinAlign) const {
  3134. // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
  3135. // datalayout for the preferred alignment. Also there should be a target hook
  3136. // for this to allow targets to reduce the alignment and ignore the
  3137. // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
  3138. // the type.
  3139. return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign);
  3140. }
  3141. MachineInstrBuilder
  3142. LegalizerHelper::createStackTemporary(TypeSize Bytes, Align Alignment,
  3143. MachinePointerInfo &PtrInfo) {
  3144. MachineFunction &MF = MIRBuilder.getMF();
  3145. const DataLayout &DL = MIRBuilder.getDataLayout();
  3146. int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
  3147. unsigned AddrSpace = DL.getAllocaAddrSpace();
  3148. LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
  3149. PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
  3150. return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
  3151. }
  3152. static Register clampDynamicVectorIndex(MachineIRBuilder &B, Register IdxReg,
  3153. LLT VecTy) {
  3154. int64_t IdxVal;
  3155. if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal)))
  3156. return IdxReg;
  3157. LLT IdxTy = B.getMRI()->getType(IdxReg);
  3158. unsigned NElts = VecTy.getNumElements();
  3159. if (isPowerOf2_32(NElts)) {
  3160. APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
  3161. return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
  3162. }
  3163. return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
  3164. .getReg(0);
  3165. }
  3166. Register LegalizerHelper::getVectorElementPointer(Register VecPtr, LLT VecTy,
  3167. Register Index) {
  3168. LLT EltTy = VecTy.getElementType();
  3169. // Calculate the element offset and add it to the pointer.
  3170. unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
  3171. assert(EltSize * 8 == EltTy.getSizeInBits() &&
  3172. "Converting bits to bytes lost precision");
  3173. Index = clampDynamicVectorIndex(MIRBuilder, Index, VecTy);
  3174. LLT IdxTy = MRI.getType(Index);
  3175. auto Mul = MIRBuilder.buildMul(IdxTy, Index,
  3176. MIRBuilder.buildConstant(IdxTy, EltSize));
  3177. LLT PtrTy = MRI.getType(VecPtr);
  3178. return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
  3179. }
  3180. #ifndef NDEBUG
  3181. /// Check that all vector operands have same number of elements. Other operands
  3182. /// should be listed in NonVecOp.
  3183. static bool hasSameNumEltsOnAllVectorOperands(
  3184. GenericMachineInstr &MI, MachineRegisterInfo &MRI,
  3185. std::initializer_list<unsigned> NonVecOpIndices) {
  3186. if (MI.getNumMemOperands() != 0)
  3187. return false;
  3188. LLT VecTy = MRI.getType(MI.getReg(0));
  3189. if (!VecTy.isVector())
  3190. return false;
  3191. unsigned NumElts = VecTy.getNumElements();
  3192. for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
  3193. MachineOperand &Op = MI.getOperand(OpIdx);
  3194. if (!Op.isReg()) {
  3195. if (!is_contained(NonVecOpIndices, OpIdx))
  3196. return false;
  3197. continue;
  3198. }
  3199. LLT Ty = MRI.getType(Op.getReg());
  3200. if (!Ty.isVector()) {
  3201. if (!is_contained(NonVecOpIndices, OpIdx))
  3202. return false;
  3203. continue;
  3204. }
  3205. if (Ty.getNumElements() != NumElts)
  3206. return false;
  3207. }
  3208. return true;
  3209. }
  3210. #endif
  3211. /// Fill \p DstOps with DstOps that have same number of elements combined as
  3212. /// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are
  3213. /// vectors with \p NumElts elements. When Ty.getNumElements() is not multiple
  3214. /// of \p NumElts last DstOp (leftover) has fewer then \p NumElts elements.
  3215. static void makeDstOps(SmallVectorImpl<DstOp> &DstOps, LLT Ty,
  3216. unsigned NumElts) {
  3217. LLT LeftoverTy;
  3218. assert(Ty.isVector() && "Expected vector type");
  3219. LLT EltTy = Ty.getElementType();
  3220. LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
  3221. int NumParts, NumLeftover;
  3222. std::tie(NumParts, NumLeftover) =
  3223. getNarrowTypeBreakDown(Ty, NarrowTy, LeftoverTy);
  3224. assert(NumParts > 0 && "Error in getNarrowTypeBreakDown");
  3225. for (int i = 0; i < NumParts; ++i) {
  3226. DstOps.push_back(NarrowTy);
  3227. }
  3228. if (LeftoverTy.isValid()) {
  3229. assert(NumLeftover == 1 && "expected exactly one leftover");
  3230. DstOps.push_back(LeftoverTy);
  3231. }
  3232. }
  3233. /// Operand \p Op is used on \p N sub-instructions. Fill \p Ops with \p N SrcOps
  3234. /// made from \p Op depending on operand type.
  3235. static void broadcastSrcOp(SmallVectorImpl<SrcOp> &Ops, unsigned N,
  3236. MachineOperand &Op) {
  3237. for (unsigned i = 0; i < N; ++i) {
  3238. if (Op.isReg())
  3239. Ops.push_back(Op.getReg());
  3240. else if (Op.isImm())
  3241. Ops.push_back(Op.getImm());
  3242. else if (Op.isPredicate())
  3243. Ops.push_back(static_cast<CmpInst::Predicate>(Op.getPredicate()));
  3244. else
  3245. llvm_unreachable("Unsupported type");
  3246. }
  3247. }
  3248. // Handle splitting vector operations which need to have the same number of
  3249. // elements in each type index, but each type index may have a different element
  3250. // type.
  3251. //
  3252. // e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
  3253. // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
  3254. // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
  3255. //
  3256. // Also handles some irregular breakdown cases, e.g.
  3257. // e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
  3258. // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
  3259. // s64 = G_SHL s64, s32
  3260. LegalizerHelper::LegalizeResult
  3261. LegalizerHelper::fewerElementsVectorMultiEltType(
  3262. GenericMachineInstr &MI, unsigned NumElts,
  3263. std::initializer_list<unsigned> NonVecOpIndices) {
  3264. assert(hasSameNumEltsOnAllVectorOperands(MI, MRI, NonVecOpIndices) &&
  3265. "Non-compatible opcode or not specified non-vector operands");
  3266. unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
  3267. unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
  3268. unsigned NumDefs = MI.getNumDefs();
  3269. // Create DstOps (sub-vectors with NumElts elts + Leftover) for each output.
  3270. // Build instructions with DstOps to use instruction found by CSE directly.
  3271. // CSE copies found instruction into given vreg when building with vreg dest.
  3272. SmallVector<SmallVector<DstOp, 8>, 2> OutputOpsPieces(NumDefs);
  3273. // Output registers will be taken from created instructions.
  3274. SmallVector<SmallVector<Register, 8>, 2> OutputRegs(NumDefs);
  3275. for (unsigned i = 0; i < NumDefs; ++i) {
  3276. makeDstOps(OutputOpsPieces[i], MRI.getType(MI.getReg(i)), NumElts);
  3277. }
  3278. // Split vector input operands into sub-vectors with NumElts elts + Leftover.
  3279. // Operands listed in NonVecOpIndices will be used as is without splitting;
  3280. // examples: compare predicate in icmp and fcmp (op 1), vector select with i1
  3281. // scalar condition (op 1), immediate in sext_inreg (op 2).
  3282. SmallVector<SmallVector<SrcOp, 8>, 3> InputOpsPieces(NumInputs);
  3283. for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
  3284. ++UseIdx, ++UseNo) {
  3285. if (is_contained(NonVecOpIndices, UseIdx)) {
  3286. broadcastSrcOp(InputOpsPieces[UseNo], OutputOpsPieces[0].size(),
  3287. MI.getOperand(UseIdx));
  3288. } else {
  3289. SmallVector<Register, 8> SplitPieces;
  3290. extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces);
  3291. for (auto Reg : SplitPieces)
  3292. InputOpsPieces[UseNo].push_back(Reg);
  3293. }
  3294. }
  3295. unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
  3296. // Take i-th piece of each input operand split and build sub-vector/scalar
  3297. // instruction. Set i-th DstOp(s) from OutputOpsPieces as destination(s).
  3298. for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
  3299. SmallVector<DstOp, 2> Defs;
  3300. for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
  3301. Defs.push_back(OutputOpsPieces[DstNo][i]);
  3302. SmallVector<SrcOp, 3> Uses;
  3303. for (unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
  3304. Uses.push_back(InputOpsPieces[InputNo][i]);
  3305. auto I = MIRBuilder.buildInstr(MI.getOpcode(), Defs, Uses, MI.getFlags());
  3306. for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
  3307. OutputRegs[DstNo].push_back(I.getReg(DstNo));
  3308. }
  3309. // Merge small outputs into MI's output for each def operand.
  3310. if (NumLeftovers) {
  3311. for (unsigned i = 0; i < NumDefs; ++i)
  3312. mergeMixedSubvectors(MI.getReg(i), OutputRegs[i]);
  3313. } else {
  3314. for (unsigned i = 0; i < NumDefs; ++i)
  3315. MIRBuilder.buildMerge(MI.getReg(i), OutputRegs[i]);
  3316. }
  3317. MI.eraseFromParent();
  3318. return Legalized;
  3319. }
  3320. LegalizerHelper::LegalizeResult
  3321. LegalizerHelper::fewerElementsVectorPhi(GenericMachineInstr &MI,
  3322. unsigned NumElts) {
  3323. unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
  3324. unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
  3325. unsigned NumDefs = MI.getNumDefs();
  3326. SmallVector<DstOp, 8> OutputOpsPieces;
  3327. SmallVector<Register, 8> OutputRegs;
  3328. makeDstOps(OutputOpsPieces, MRI.getType(MI.getReg(0)), NumElts);
  3329. // Instructions that perform register split will be inserted in basic block
  3330. // where register is defined (basic block is in the next operand).
  3331. SmallVector<SmallVector<Register, 8>, 3> InputOpsPieces(NumInputs / 2);
  3332. for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
  3333. UseIdx += 2, ++UseNo) {
  3334. MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB();
  3335. MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
  3336. extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo]);
  3337. }
  3338. // Build PHIs with fewer elements.
  3339. unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
  3340. MIRBuilder.setInsertPt(*MI.getParent(), MI);
  3341. for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
  3342. auto Phi = MIRBuilder.buildInstr(TargetOpcode::G_PHI);
  3343. Phi.addDef(
  3344. MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
  3345. OutputRegs.push_back(Phi.getReg(0));
  3346. for (unsigned j = 0; j < NumInputs / 2; ++j) {
  3347. Phi.addUse(InputOpsPieces[j][i]);
  3348. Phi.add(MI.getOperand(1 + j * 2 + 1));
  3349. }
  3350. }
  3351. // Merge small outputs into MI's def.
  3352. if (NumLeftovers) {
  3353. mergeMixedSubvectors(MI.getReg(0), OutputRegs);
  3354. } else {
  3355. MIRBuilder.buildMerge(MI.getReg(0), OutputRegs);
  3356. }
  3357. MI.eraseFromParent();
  3358. return Legalized;
  3359. }
  3360. LegalizerHelper::LegalizeResult
  3361. LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI,
  3362. unsigned TypeIdx,
  3363. LLT NarrowTy) {
  3364. const int NumDst = MI.getNumOperands() - 1;
  3365. const Register SrcReg = MI.getOperand(NumDst).getReg();
  3366. LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
  3367. LLT SrcTy = MRI.getType(SrcReg);
  3368. if (TypeIdx != 1 || NarrowTy == DstTy)
  3369. return UnableToLegalize;
  3370. // Requires compatible types. Otherwise SrcReg should have been defined by
  3371. // merge-like instruction that would get artifact combined. Most likely
  3372. // instruction that defines SrcReg has to perform more/fewer elements
  3373. // legalization compatible with NarrowTy.
  3374. assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types");
  3375. assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
  3376. if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
  3377. (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0))
  3378. return UnableToLegalize;
  3379. // This is most likely DstTy (smaller then register size) packed in SrcTy
  3380. // (larger then register size) and since unmerge was not combined it will be
  3381. // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy
  3382. // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy.
  3383. // %1:_(DstTy), %2, %3, %4 = G_UNMERGE_VALUES %0:_(SrcTy)
  3384. //
  3385. // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence
  3386. // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg
  3387. // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy)
  3388. auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
  3389. const int NumUnmerge = Unmerge->getNumOperands() - 1;
  3390. const int PartsPerUnmerge = NumDst / NumUnmerge;
  3391. for (int I = 0; I != NumUnmerge; ++I) {
  3392. auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
  3393. for (int J = 0; J != PartsPerUnmerge; ++J)
  3394. MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
  3395. MIB.addUse(Unmerge.getReg(I));
  3396. }
  3397. MI.eraseFromParent();
  3398. return Legalized;
  3399. }
  3400. LegalizerHelper::LegalizeResult
  3401. LegalizerHelper::fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx,
  3402. LLT NarrowTy) {
  3403. Register DstReg = MI.getOperand(0).getReg();
  3404. LLT DstTy = MRI.getType(DstReg);
  3405. LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
  3406. // Requires compatible types. Otherwise user of DstReg did not perform unmerge
  3407. // that should have been artifact combined. Most likely instruction that uses
  3408. // DstReg has to do more/fewer elements legalization compatible with NarrowTy.
  3409. assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types");
  3410. assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
  3411. if (NarrowTy == SrcTy)
  3412. return UnableToLegalize;
  3413. // This attempts to lower part of LCMTy merge/unmerge sequence. Intended use
  3414. // is for old mir tests. Since the changes to more/fewer elements it should no
  3415. // longer be possible to generate MIR like this when starting from llvm-ir
  3416. // because LCMTy approach was replaced with merge/unmerge to vector elements.
  3417. if (TypeIdx == 1) {
  3418. assert(SrcTy.isVector() && "Expected vector types");
  3419. assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
  3420. if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
  3421. (NarrowTy.getNumElements() >= SrcTy.getNumElements()))
  3422. return UnableToLegalize;
  3423. // %2:_(DstTy) = G_CONCAT_VECTORS %0:_(SrcTy), %1:_(SrcTy)
  3424. //
  3425. // %3:_(EltTy), %4, %5 = G_UNMERGE_VALUES %0:_(SrcTy)
  3426. // %6:_(EltTy), %7, %8 = G_UNMERGE_VALUES %1:_(SrcTy)
  3427. // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4
  3428. // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6
  3429. // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8
  3430. // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11
  3431. SmallVector<Register, 8> Elts;
  3432. LLT EltTy = MRI.getType(MI.getOperand(1).getReg()).getScalarType();
  3433. for (unsigned i = 1; i < MI.getNumOperands(); ++i) {
  3434. auto Unmerge = MIRBuilder.buildUnmerge(EltTy, MI.getOperand(i).getReg());
  3435. for (unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
  3436. Elts.push_back(Unmerge.getReg(j));
  3437. }
  3438. SmallVector<Register, 8> NarrowTyElts;
  3439. unsigned NumNarrowTyElts = NarrowTy.getNumElements();
  3440. unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
  3441. for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces;
  3442. ++i, Offset += NumNarrowTyElts) {
  3443. ArrayRef<Register> Pieces(&Elts[Offset], NumNarrowTyElts);
  3444. NarrowTyElts.push_back(MIRBuilder.buildMerge(NarrowTy, Pieces).getReg(0));
  3445. }
  3446. MIRBuilder.buildMerge(DstReg, NarrowTyElts);
  3447. MI.eraseFromParent();
  3448. return Legalized;
  3449. }
  3450. assert(TypeIdx == 0 && "Bad type index");
  3451. if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
  3452. (DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0))
  3453. return UnableToLegalize;
  3454. // This is most likely SrcTy (smaller then register size) packed in DstTy
  3455. // (larger then register size) and since merge was not combined it will be
  3456. // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy
  3457. // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy.
  3458. // %0:_(DstTy) = G_MERGE_VALUES %1:_(SrcTy), %2, %3, %4
  3459. //
  3460. // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg
  3461. // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4
  3462. // %0:_(DstTy) = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence
  3463. SmallVector<Register, 8> NarrowTyElts;
  3464. unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
  3465. unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
  3466. unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts;
  3467. for (unsigned i = 0; i < NumParts; ++i) {
  3468. SmallVector<Register, 8> Sources;
  3469. for (unsigned j = 0; j < NumElts; ++j)
  3470. Sources.push_back(MI.getOperand(1 + i * NumElts + j).getReg());
  3471. NarrowTyElts.push_back(MIRBuilder.buildMerge(NarrowTy, Sources).getReg(0));
  3472. }
  3473. MIRBuilder.buildMerge(DstReg, NarrowTyElts);
  3474. MI.eraseFromParent();
  3475. return Legalized;
  3476. }
  3477. LegalizerHelper::LegalizeResult
  3478. LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI,
  3479. unsigned TypeIdx,
  3480. LLT NarrowVecTy) {
  3481. Register DstReg = MI.getOperand(0).getReg();
  3482. Register SrcVec = MI.getOperand(1).getReg();
  3483. Register InsertVal;
  3484. bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
  3485. assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
  3486. if (IsInsert)
  3487. InsertVal = MI.getOperand(2).getReg();
  3488. Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
  3489. // TODO: Handle total scalarization case.
  3490. if (!NarrowVecTy.isVector())
  3491. return UnableToLegalize;
  3492. LLT VecTy = MRI.getType(SrcVec);
  3493. // If the index is a constant, we can really break this down as you would
  3494. // expect, and index into the target size pieces.
  3495. int64_t IdxVal;
  3496. auto MaybeCst = getIConstantVRegValWithLookThrough(Idx, MRI);
  3497. if (MaybeCst) {
  3498. IdxVal = MaybeCst->Value.getSExtValue();
  3499. // Avoid out of bounds indexing the pieces.
  3500. if (IdxVal >= VecTy.getNumElements()) {
  3501. MIRBuilder.buildUndef(DstReg);
  3502. MI.eraseFromParent();
  3503. return Legalized;
  3504. }
  3505. SmallVector<Register, 8> VecParts;
  3506. LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
  3507. // Build a sequence of NarrowTy pieces in VecParts for this operand.
  3508. LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
  3509. TargetOpcode::G_ANYEXT);
  3510. unsigned NewNumElts = NarrowVecTy.getNumElements();
  3511. LLT IdxTy = MRI.getType(Idx);
  3512. int64_t PartIdx = IdxVal / NewNumElts;
  3513. auto NewIdx =
  3514. MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
  3515. if (IsInsert) {
  3516. LLT PartTy = MRI.getType(VecParts[PartIdx]);
  3517. // Use the adjusted index to insert into one of the subvectors.
  3518. auto InsertPart = MIRBuilder.buildInsertVectorElement(
  3519. PartTy, VecParts[PartIdx], InsertVal, NewIdx);
  3520. VecParts[PartIdx] = InsertPart.getReg(0);
  3521. // Recombine the inserted subvector with the others to reform the result
  3522. // vector.
  3523. buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
  3524. } else {
  3525. MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
  3526. }
  3527. MI.eraseFromParent();
  3528. return Legalized;
  3529. }
  3530. // With a variable index, we can't perform the operation in a smaller type, so
  3531. // we're forced to expand this.
  3532. //
  3533. // TODO: We could emit a chain of compare/select to figure out which piece to
  3534. // index.
  3535. return lowerExtractInsertVectorElt(MI);
  3536. }
  3537. LegalizerHelper::LegalizeResult
  3538. LegalizerHelper::reduceLoadStoreWidth(GLoadStore &LdStMI, unsigned TypeIdx,
  3539. LLT NarrowTy) {
  3540. // FIXME: Don't know how to handle secondary types yet.
  3541. if (TypeIdx != 0)
  3542. return UnableToLegalize;
  3543. // This implementation doesn't work for atomics. Give up instead of doing
  3544. // something invalid.
  3545. if (LdStMI.isAtomic())
  3546. return UnableToLegalize;
  3547. bool IsLoad = isa<GLoad>(LdStMI);
  3548. Register ValReg = LdStMI.getReg(0);
  3549. Register AddrReg = LdStMI.getPointerReg();
  3550. LLT ValTy = MRI.getType(ValReg);
  3551. // FIXME: Do we need a distinct NarrowMemory legalize action?
  3552. if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize()) {
  3553. LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
  3554. return UnableToLegalize;
  3555. }
  3556. int NumParts = -1;
  3557. int NumLeftover = -1;
  3558. LLT LeftoverTy;
  3559. SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
  3560. if (IsLoad) {
  3561. std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
  3562. } else {
  3563. if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
  3564. NarrowLeftoverRegs)) {
  3565. NumParts = NarrowRegs.size();
  3566. NumLeftover = NarrowLeftoverRegs.size();
  3567. }
  3568. }
  3569. if (NumParts == -1)
  3570. return UnableToLegalize;
  3571. LLT PtrTy = MRI.getType(AddrReg);
  3572. const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
  3573. unsigned TotalSize = ValTy.getSizeInBits();
  3574. // Split the load/store into PartTy sized pieces starting at Offset. If this
  3575. // is a load, return the new registers in ValRegs. For a store, each elements
  3576. // of ValRegs should be PartTy. Returns the next offset that needs to be
  3577. // handled.
  3578. auto MMO = LdStMI.getMMO();
  3579. auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
  3580. unsigned Offset) -> unsigned {
  3581. MachineFunction &MF = MIRBuilder.getMF();
  3582. unsigned PartSize = PartTy.getSizeInBits();
  3583. for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
  3584. Offset += PartSize, ++Idx) {
  3585. unsigned ByteOffset = Offset / 8;
  3586. Register NewAddrReg;
  3587. MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
  3588. MachineMemOperand *NewMMO =
  3589. MF.getMachineMemOperand(&MMO, ByteOffset, PartTy);
  3590. if (IsLoad) {
  3591. Register Dst = MRI.createGenericVirtualRegister(PartTy);
  3592. ValRegs.push_back(Dst);
  3593. MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
  3594. } else {
  3595. MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
  3596. }
  3597. }
  3598. return Offset;
  3599. };
  3600. unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0);
  3601. // Handle the rest of the register if this isn't an even type breakdown.
  3602. if (LeftoverTy.isValid())
  3603. splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset);
  3604. if (IsLoad) {
  3605. insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
  3606. LeftoverTy, NarrowLeftoverRegs);
  3607. }
  3608. LdStMI.eraseFromParent();
  3609. return Legalized;
  3610. }
  3611. LegalizerHelper::LegalizeResult
  3612. LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
  3613. LLT NarrowTy) {
  3614. using namespace TargetOpcode;
  3615. GenericMachineInstr &GMI = cast<GenericMachineInstr>(MI);
  3616. unsigned NumElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
  3617. switch (MI.getOpcode()) {
  3618. case G_IMPLICIT_DEF:
  3619. case G_TRUNC:
  3620. case G_AND:
  3621. case G_OR:
  3622. case G_XOR:
  3623. case G_ADD:
  3624. case G_SUB:
  3625. case G_MUL:
  3626. case G_PTR_ADD:
  3627. case G_SMULH:
  3628. case G_UMULH:
  3629. case G_FADD:
  3630. case G_FMUL:
  3631. case G_FSUB:
  3632. case G_FNEG:
  3633. case G_FABS:
  3634. case G_FCANONICALIZE:
  3635. case G_FDIV:
  3636. case G_FREM:
  3637. case G_FMA:
  3638. case G_FMAD:
  3639. case G_FPOW:
  3640. case G_FEXP:
  3641. case G_FEXP2:
  3642. case G_FLOG:
  3643. case G_FLOG2:
  3644. case G_FLOG10:
  3645. case G_FNEARBYINT:
  3646. case G_FCEIL:
  3647. case G_FFLOOR:
  3648. case G_FRINT:
  3649. case G_INTRINSIC_ROUND:
  3650. case G_INTRINSIC_ROUNDEVEN:
  3651. case G_INTRINSIC_TRUNC:
  3652. case G_FCOS:
  3653. case G_FSIN:
  3654. case G_FSQRT:
  3655. case G_BSWAP:
  3656. case G_BITREVERSE:
  3657. case G_SDIV:
  3658. case G_UDIV:
  3659. case G_SREM:
  3660. case G_UREM:
  3661. case G_SDIVREM:
  3662. case G_UDIVREM:
  3663. case G_SMIN:
  3664. case G_SMAX:
  3665. case G_UMIN:
  3666. case G_UMAX:
  3667. case G_ABS:
  3668. case G_FMINNUM:
  3669. case G_FMAXNUM:
  3670. case G_FMINNUM_IEEE:
  3671. case G_FMAXNUM_IEEE:
  3672. case G_FMINIMUM:
  3673. case G_FMAXIMUM:
  3674. case G_FSHL:
  3675. case G_FSHR:
  3676. case G_ROTL:
  3677. case G_ROTR:
  3678. case G_FREEZE:
  3679. case G_SADDSAT:
  3680. case G_SSUBSAT:
  3681. case G_UADDSAT:
  3682. case G_USUBSAT:
  3683. case G_UMULO:
  3684. case G_SMULO:
  3685. case G_SHL:
  3686. case G_LSHR:
  3687. case G_ASHR:
  3688. case G_SSHLSAT:
  3689. case G_USHLSAT:
  3690. case G_CTLZ:
  3691. case G_CTLZ_ZERO_UNDEF:
  3692. case G_CTTZ:
  3693. case G_CTTZ_ZERO_UNDEF:
  3694. case G_CTPOP:
  3695. case G_FCOPYSIGN:
  3696. case G_ZEXT:
  3697. case G_SEXT:
  3698. case G_ANYEXT:
  3699. case G_FPEXT:
  3700. case G_FPTRUNC:
  3701. case G_SITOFP:
  3702. case G_UITOFP:
  3703. case G_FPTOSI:
  3704. case G_FPTOUI:
  3705. case G_INTTOPTR:
  3706. case G_PTRTOINT:
  3707. case G_ADDRSPACE_CAST:
  3708. return fewerElementsVectorMultiEltType(GMI, NumElts);
  3709. case G_ICMP:
  3710. case G_FCMP:
  3711. return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*cpm predicate*/});
  3712. case G_SELECT:
  3713. if (MRI.getType(MI.getOperand(1).getReg()).isVector())
  3714. return fewerElementsVectorMultiEltType(GMI, NumElts);
  3715. return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*scalar cond*/});
  3716. case G_PHI:
  3717. return fewerElementsVectorPhi(GMI, NumElts);
  3718. case G_UNMERGE_VALUES:
  3719. return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
  3720. case G_BUILD_VECTOR:
  3721. assert(TypeIdx == 0 && "not a vector type index");
  3722. return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
  3723. case G_CONCAT_VECTORS:
  3724. if (TypeIdx != 1) // TODO: This probably does work as expected already.
  3725. return UnableToLegalize;
  3726. return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
  3727. case G_EXTRACT_VECTOR_ELT:
  3728. case G_INSERT_VECTOR_ELT:
  3729. return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
  3730. case G_LOAD:
  3731. case G_STORE:
  3732. return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy);
  3733. case G_SEXT_INREG:
  3734. return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*imm*/});
  3735. GISEL_VECREDUCE_CASES_NONSEQ
  3736. return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
  3737. case G_SHUFFLE_VECTOR:
  3738. return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
  3739. default:
  3740. return UnableToLegalize;
  3741. }
  3742. }
  3743. LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle(
  3744. MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
  3745. assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
  3746. if (TypeIdx != 0)
  3747. return UnableToLegalize;
  3748. Register DstReg = MI.getOperand(0).getReg();
  3749. Register Src1Reg = MI.getOperand(1).getReg();
  3750. Register Src2Reg = MI.getOperand(2).getReg();
  3751. ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
  3752. LLT DstTy = MRI.getType(DstReg);
  3753. LLT Src1Ty = MRI.getType(Src1Reg);
  3754. LLT Src2Ty = MRI.getType(Src2Reg);
  3755. // The shuffle should be canonicalized by now.
  3756. if (DstTy != Src1Ty)
  3757. return UnableToLegalize;
  3758. if (DstTy != Src2Ty)
  3759. return UnableToLegalize;
  3760. if (!isPowerOf2_32(DstTy.getNumElements()))
  3761. return UnableToLegalize;
  3762. // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly.
  3763. // Further legalization attempts will be needed to do split further.
  3764. NarrowTy =
  3765. DstTy.changeElementCount(DstTy.getElementCount().divideCoefficientBy(2));
  3766. unsigned NewElts = NarrowTy.getNumElements();
  3767. SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
  3768. extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs);
  3769. extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs);
  3770. Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
  3771. SplitSrc2Regs[1]};
  3772. Register Hi, Lo;
  3773. // If Lo or Hi uses elements from at most two of the four input vectors, then
  3774. // express it as a vector shuffle of those two inputs. Otherwise extract the
  3775. // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
  3776. SmallVector<int, 16> Ops;
  3777. for (unsigned High = 0; High < 2; ++High) {
  3778. Register &Output = High ? Hi : Lo;
  3779. // Build a shuffle mask for the output, discovering on the fly which
  3780. // input vectors to use as shuffle operands (recorded in InputUsed).
  3781. // If building a suitable shuffle vector proves too hard, then bail
  3782. // out with useBuildVector set.
  3783. unsigned InputUsed[2] = {-1U, -1U}; // Not yet discovered.
  3784. unsigned FirstMaskIdx = High * NewElts;
  3785. bool UseBuildVector = false;
  3786. for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
  3787. // The mask element. This indexes into the input.
  3788. int Idx = Mask[FirstMaskIdx + MaskOffset];
  3789. // The input vector this mask element indexes into.
  3790. unsigned Input = (unsigned)Idx / NewElts;
  3791. if (Input >= array_lengthof(Inputs)) {
  3792. // The mask element does not index into any input vector.
  3793. Ops.push_back(-1);
  3794. continue;
  3795. }
  3796. // Turn the index into an offset from the start of the input vector.
  3797. Idx -= Input * NewElts;
  3798. // Find or create a shuffle vector operand to hold this input.
  3799. unsigned OpNo;
  3800. for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) {
  3801. if (InputUsed[OpNo] == Input) {
  3802. // This input vector is already an operand.
  3803. break;
  3804. } else if (InputUsed[OpNo] == -1U) {
  3805. // Create a new operand for this input vector.
  3806. InputUsed[OpNo] = Input;
  3807. break;
  3808. }
  3809. }
  3810. if (OpNo >= array_lengthof(InputUsed)) {
  3811. // More than two input vectors used! Give up on trying to create a
  3812. // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
  3813. UseBuildVector = true;
  3814. break;
  3815. }
  3816. // Add the mask index for the new shuffle vector.
  3817. Ops.push_back(Idx + OpNo * NewElts);
  3818. }
  3819. if (UseBuildVector) {
  3820. LLT EltTy = NarrowTy.getElementType();
  3821. SmallVector<Register, 16> SVOps;
  3822. // Extract the input elements by hand.
  3823. for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
  3824. // The mask element. This indexes into the input.
  3825. int Idx = Mask[FirstMaskIdx + MaskOffset];
  3826. // The input vector this mask element indexes into.
  3827. unsigned Input = (unsigned)Idx / NewElts;
  3828. if (Input >= array_lengthof(Inputs)) {
  3829. // The mask element is "undef" or indexes off the end of the input.
  3830. SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0));
  3831. continue;
  3832. }
  3833. // Turn the index into an offset from the start of the input vector.
  3834. Idx -= Input * NewElts;
  3835. // Extract the vector element by hand.
  3836. SVOps.push_back(MIRBuilder
  3837. .buildExtractVectorElement(
  3838. EltTy, Inputs[Input],
  3839. MIRBuilder.buildConstant(LLT::scalar(32), Idx))
  3840. .getReg(0));
  3841. }
  3842. // Construct the Lo/Hi output using a G_BUILD_VECTOR.
  3843. Output = MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
  3844. } else if (InputUsed[0] == -1U) {
  3845. // No input vectors were used! The result is undefined.
  3846. Output = MIRBuilder.buildUndef(NarrowTy).getReg(0);
  3847. } else {
  3848. Register Op0 = Inputs[InputUsed[0]];
  3849. // If only one input was used, use an undefined vector for the other.
  3850. Register Op1 = InputUsed[1] == -1U
  3851. ? MIRBuilder.buildUndef(NarrowTy).getReg(0)
  3852. : Inputs[InputUsed[1]];
  3853. // At least one input vector was used. Create a new shuffle vector.
  3854. Output = MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1, Ops).getReg(0);
  3855. }
  3856. Ops.clear();
  3857. }
  3858. MIRBuilder.buildConcatVectors(DstReg, {Lo, Hi});
  3859. MI.eraseFromParent();
  3860. return Legalized;
  3861. }
  3862. static unsigned getScalarOpcForReduction(unsigned Opc) {
  3863. unsigned ScalarOpc;
  3864. switch (Opc) {
  3865. case TargetOpcode::G_VECREDUCE_FADD:
  3866. ScalarOpc = TargetOpcode::G_FADD;
  3867. break;
  3868. case TargetOpcode::G_VECREDUCE_FMUL:
  3869. ScalarOpc = TargetOpcode::G_FMUL;
  3870. break;
  3871. case TargetOpcode::G_VECREDUCE_FMAX:
  3872. ScalarOpc = TargetOpcode::G_FMAXNUM;
  3873. break;
  3874. case TargetOpcode::G_VECREDUCE_FMIN:
  3875. ScalarOpc = TargetOpcode::G_FMINNUM;
  3876. break;
  3877. case TargetOpcode::G_VECREDUCE_ADD:
  3878. ScalarOpc = TargetOpcode::G_ADD;
  3879. break;
  3880. case TargetOpcode::G_VECREDUCE_MUL:
  3881. ScalarOpc = TargetOpcode::G_MUL;
  3882. break;
  3883. case TargetOpcode::G_VECREDUCE_AND:
  3884. ScalarOpc = TargetOpcode::G_AND;
  3885. break;
  3886. case TargetOpcode::G_VECREDUCE_OR:
  3887. ScalarOpc = TargetOpcode::G_OR;
  3888. break;
  3889. case TargetOpcode::G_VECREDUCE_XOR:
  3890. ScalarOpc = TargetOpcode::G_XOR;
  3891. break;
  3892. case TargetOpcode::G_VECREDUCE_SMAX:
  3893. ScalarOpc = TargetOpcode::G_SMAX;
  3894. break;
  3895. case TargetOpcode::G_VECREDUCE_SMIN:
  3896. ScalarOpc = TargetOpcode::G_SMIN;
  3897. break;
  3898. case TargetOpcode::G_VECREDUCE_UMAX:
  3899. ScalarOpc = TargetOpcode::G_UMAX;
  3900. break;
  3901. case TargetOpcode::G_VECREDUCE_UMIN:
  3902. ScalarOpc = TargetOpcode::G_UMIN;
  3903. break;
  3904. default:
  3905. llvm_unreachable("Unhandled reduction");
  3906. }
  3907. return ScalarOpc;
  3908. }
  3909. LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions(
  3910. MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
  3911. unsigned Opc = MI.getOpcode();
  3912. assert(Opc != TargetOpcode::G_VECREDUCE_SEQ_FADD &&
  3913. Opc != TargetOpcode::G_VECREDUCE_SEQ_FMUL &&
  3914. "Sequential reductions not expected");
  3915. if (TypeIdx != 1)
  3916. return UnableToLegalize;
  3917. // The semantics of the normal non-sequential reductions allow us to freely
  3918. // re-associate the operation.
  3919. Register SrcReg = MI.getOperand(1).getReg();
  3920. LLT SrcTy = MRI.getType(SrcReg);
  3921. Register DstReg = MI.getOperand(0).getReg();
  3922. LLT DstTy = MRI.getType(DstReg);
  3923. if (NarrowTy.isVector() &&
  3924. (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
  3925. return UnableToLegalize;
  3926. unsigned ScalarOpc = getScalarOpcForReduction(Opc);
  3927. SmallVector<Register> SplitSrcs;
  3928. // If NarrowTy is a scalar then we're being asked to scalarize.
  3929. const unsigned NumParts =
  3930. NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements()
  3931. : SrcTy.getNumElements();
  3932. extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs);
  3933. if (NarrowTy.isScalar()) {
  3934. if (DstTy != NarrowTy)
  3935. return UnableToLegalize; // FIXME: handle implicit extensions.
  3936. if (isPowerOf2_32(NumParts)) {
  3937. // Generate a tree of scalar operations to reduce the critical path.
  3938. SmallVector<Register> PartialResults;
  3939. unsigned NumPartsLeft = NumParts;
  3940. while (NumPartsLeft > 1) {
  3941. for (unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
  3942. PartialResults.emplace_back(
  3943. MIRBuilder
  3944. .buildInstr(ScalarOpc, {NarrowTy},
  3945. {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
  3946. .getReg(0));
  3947. }
  3948. SplitSrcs = PartialResults;
  3949. PartialResults.clear();
  3950. NumPartsLeft = SplitSrcs.size();
  3951. }
  3952. assert(SplitSrcs.size() == 1);
  3953. MIRBuilder.buildCopy(DstReg, SplitSrcs[0]);
  3954. MI.eraseFromParent();
  3955. return Legalized;
  3956. }
  3957. // If we can't generate a tree, then just do sequential operations.
  3958. Register Acc = SplitSrcs[0];
  3959. for (unsigned Idx = 1; Idx < NumParts; ++Idx)
  3960. Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
  3961. .getReg(0);
  3962. MIRBuilder.buildCopy(DstReg, Acc);
  3963. MI.eraseFromParent();
  3964. return Legalized;
  3965. }
  3966. SmallVector<Register> PartialReductions;
  3967. for (unsigned Part = 0; Part < NumParts; ++Part) {
  3968. PartialReductions.push_back(
  3969. MIRBuilder.buildInstr(Opc, {DstTy}, {SplitSrcs[Part]}).getReg(0));
  3970. }
  3971. // If the types involved are powers of 2, we can generate intermediate vector
  3972. // ops, before generating a final reduction operation.
  3973. if (isPowerOf2_32(SrcTy.getNumElements()) &&
  3974. isPowerOf2_32(NarrowTy.getNumElements())) {
  3975. return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
  3976. }
  3977. Register Acc = PartialReductions[0];
  3978. for (unsigned Part = 1; Part < NumParts; ++Part) {
  3979. if (Part == NumParts - 1) {
  3980. MIRBuilder.buildInstr(ScalarOpc, {DstReg},
  3981. {Acc, PartialReductions[Part]});
  3982. } else {
  3983. Acc = MIRBuilder
  3984. .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
  3985. .getReg(0);
  3986. }
  3987. }
  3988. MI.eraseFromParent();
  3989. return Legalized;
  3990. }
  3991. LegalizerHelper::LegalizeResult
  3992. LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
  3993. LLT SrcTy, LLT NarrowTy,
  3994. unsigned ScalarOpc) {
  3995. SmallVector<Register> SplitSrcs;
  3996. // Split the sources into NarrowTy size pieces.
  3997. extractParts(SrcReg, NarrowTy,
  3998. SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs);
  3999. // We're going to do a tree reduction using vector operations until we have
  4000. // one NarrowTy size value left.
  4001. while (SplitSrcs.size() > 1) {
  4002. SmallVector<Register> PartialRdxs;
  4003. for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
  4004. Register LHS = SplitSrcs[Idx];
  4005. Register RHS = SplitSrcs[Idx + 1];
  4006. // Create the intermediate vector op.
  4007. Register Res =
  4008. MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0);
  4009. PartialRdxs.push_back(Res);
  4010. }
  4011. SplitSrcs = std::move(PartialRdxs);
  4012. }
  4013. // Finally generate the requested NarrowTy based reduction.
  4014. Observer.changingInstr(MI);
  4015. MI.getOperand(1).setReg(SplitSrcs[0]);
  4016. Observer.changedInstr(MI);
  4017. return Legalized;
  4018. }
  4019. LegalizerHelper::LegalizeResult
  4020. LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt,
  4021. const LLT HalfTy, const LLT AmtTy) {
  4022. Register InL = MRI.createGenericVirtualRegister(HalfTy);
  4023. Register InH = MRI.createGenericVirtualRegister(HalfTy);
  4024. MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
  4025. if (Amt.isZero()) {
  4026. MIRBuilder.buildMerge(MI.getOperand(0), {InL, InH});
  4027. MI.eraseFromParent();
  4028. return Legalized;
  4029. }
  4030. LLT NVT = HalfTy;
  4031. unsigned NVTBits = HalfTy.getSizeInBits();
  4032. unsigned VTBits = 2 * NVTBits;
  4033. SrcOp Lo(Register(0)), Hi(Register(0));
  4034. if (MI.getOpcode() == TargetOpcode::G_SHL) {
  4035. if (Amt.ugt(VTBits)) {
  4036. Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
  4037. } else if (Amt.ugt(NVTBits)) {
  4038. Lo = MIRBuilder.buildConstant(NVT, 0);
  4039. Hi = MIRBuilder.buildShl(NVT, InL,
  4040. MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
  4041. } else if (Amt == NVTBits) {
  4042. Lo = MIRBuilder.buildConstant(NVT, 0);
  4043. Hi = InL;
  4044. } else {
  4045. Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
  4046. auto OrLHS =
  4047. MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
  4048. auto OrRHS = MIRBuilder.buildLShr(
  4049. NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
  4050. Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
  4051. }
  4052. } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
  4053. if (Amt.ugt(VTBits)) {
  4054. Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
  4055. } else if (Amt.ugt(NVTBits)) {
  4056. Lo = MIRBuilder.buildLShr(NVT, InH,
  4057. MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
  4058. Hi = MIRBuilder.buildConstant(NVT, 0);
  4059. } else if (Amt == NVTBits) {
  4060. Lo = InH;
  4061. Hi = MIRBuilder.buildConstant(NVT, 0);
  4062. } else {
  4063. auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
  4064. auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
  4065. auto OrRHS = MIRBuilder.buildShl(
  4066. NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
  4067. Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
  4068. Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
  4069. }
  4070. } else {
  4071. if (Amt.ugt(VTBits)) {
  4072. Hi = Lo = MIRBuilder.buildAShr(
  4073. NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
  4074. } else if (Amt.ugt(NVTBits)) {
  4075. Lo = MIRBuilder.buildAShr(NVT, InH,
  4076. MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
  4077. Hi = MIRBuilder.buildAShr(NVT, InH,
  4078. MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
  4079. } else if (Amt == NVTBits) {
  4080. Lo = InH;
  4081. Hi = MIRBuilder.buildAShr(NVT, InH,
  4082. MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
  4083. } else {
  4084. auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
  4085. auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
  4086. auto OrRHS = MIRBuilder.buildShl(
  4087. NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
  4088. Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
  4089. Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
  4090. }
  4091. }
  4092. MIRBuilder.buildMerge(MI.getOperand(0), {Lo, Hi});
  4093. MI.eraseFromParent();
  4094. return Legalized;
  4095. }
  4096. // TODO: Optimize if constant shift amount.
  4097. LegalizerHelper::LegalizeResult
  4098. LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx,
  4099. LLT RequestedTy) {
  4100. if (TypeIdx == 1) {
  4101. Observer.changingInstr(MI);
  4102. narrowScalarSrc(MI, RequestedTy, 2);
  4103. Observer.changedInstr(MI);
  4104. return Legalized;
  4105. }
  4106. Register DstReg = MI.getOperand(0).getReg();
  4107. LLT DstTy = MRI.getType(DstReg);
  4108. if (DstTy.isVector())
  4109. return UnableToLegalize;
  4110. Register Amt = MI.getOperand(2).getReg();
  4111. LLT ShiftAmtTy = MRI.getType(Amt);
  4112. const unsigned DstEltSize = DstTy.getScalarSizeInBits();
  4113. if (DstEltSize % 2 != 0)
  4114. return UnableToLegalize;
  4115. // Ignore the input type. We can only go to exactly half the size of the
  4116. // input. If that isn't small enough, the resulting pieces will be further
  4117. // legalized.
  4118. const unsigned NewBitSize = DstEltSize / 2;
  4119. const LLT HalfTy = LLT::scalar(NewBitSize);
  4120. const LLT CondTy = LLT::scalar(1);
  4121. if (auto VRegAndVal = getIConstantVRegValWithLookThrough(Amt, MRI)) {
  4122. return narrowScalarShiftByConstant(MI, VRegAndVal->Value, HalfTy,
  4123. ShiftAmtTy);
  4124. }
  4125. // TODO: Expand with known bits.
  4126. // Handle the fully general expansion by an unknown amount.
  4127. auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
  4128. Register InL = MRI.createGenericVirtualRegister(HalfTy);
  4129. Register InH = MRI.createGenericVirtualRegister(HalfTy);
  4130. MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
  4131. auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
  4132. auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
  4133. auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
  4134. auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
  4135. auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
  4136. Register ResultRegs[2];
  4137. switch (MI.getOpcode()) {
  4138. case TargetOpcode::G_SHL: {
  4139. // Short: ShAmt < NewBitSize
  4140. auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
  4141. auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
  4142. auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
  4143. auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
  4144. // Long: ShAmt >= NewBitSize
  4145. auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
  4146. auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
  4147. auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
  4148. auto Hi = MIRBuilder.buildSelect(
  4149. HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
  4150. ResultRegs[0] = Lo.getReg(0);
  4151. ResultRegs[1] = Hi.getReg(0);
  4152. break;
  4153. }
  4154. case TargetOpcode::G_LSHR:
  4155. case TargetOpcode::G_ASHR: {
  4156. // Short: ShAmt < NewBitSize
  4157. auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
  4158. auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
  4159. auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
  4160. auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
  4161. // Long: ShAmt >= NewBitSize
  4162. MachineInstrBuilder HiL;
  4163. if (MI.getOpcode() == TargetOpcode::G_LSHR) {
  4164. HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
  4165. } else {
  4166. auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
  4167. HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part.
  4168. }
  4169. auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
  4170. {InH, AmtExcess}); // Lo from Hi part.
  4171. auto Lo = MIRBuilder.buildSelect(
  4172. HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
  4173. auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
  4174. ResultRegs[0] = Lo.getReg(0);
  4175. ResultRegs[1] = Hi.getReg(0);
  4176. break;
  4177. }
  4178. default:
  4179. llvm_unreachable("not a shift");
  4180. }
  4181. MIRBuilder.buildMerge(DstReg, ResultRegs);
  4182. MI.eraseFromParent();
  4183. return Legalized;
  4184. }
  4185. LegalizerHelper::LegalizeResult
  4186. LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
  4187. LLT MoreTy) {
  4188. assert(TypeIdx == 0 && "Expecting only Idx 0");
  4189. Observer.changingInstr(MI);
  4190. for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
  4191. MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
  4192. MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
  4193. moreElementsVectorSrc(MI, MoreTy, I);
  4194. }
  4195. MachineBasicBlock &MBB = *MI.getParent();
  4196. MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
  4197. moreElementsVectorDst(MI, MoreTy, 0);
  4198. Observer.changedInstr(MI);
  4199. return Legalized;
  4200. }
  4201. LegalizerHelper::LegalizeResult
  4202. LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
  4203. LLT MoreTy) {
  4204. unsigned Opc = MI.getOpcode();
  4205. switch (Opc) {
  4206. case TargetOpcode::G_IMPLICIT_DEF:
  4207. case TargetOpcode::G_LOAD: {
  4208. if (TypeIdx != 0)
  4209. return UnableToLegalize;
  4210. Observer.changingInstr(MI);
  4211. moreElementsVectorDst(MI, MoreTy, 0);
  4212. Observer.changedInstr(MI);
  4213. return Legalized;
  4214. }
  4215. case TargetOpcode::G_STORE:
  4216. if (TypeIdx != 0)
  4217. return UnableToLegalize;
  4218. Observer.changingInstr(MI);
  4219. moreElementsVectorSrc(MI, MoreTy, 0);
  4220. Observer.changedInstr(MI);
  4221. return Legalized;
  4222. case TargetOpcode::G_AND:
  4223. case TargetOpcode::G_OR:
  4224. case TargetOpcode::G_XOR:
  4225. case TargetOpcode::G_ADD:
  4226. case TargetOpcode::G_SUB:
  4227. case TargetOpcode::G_MUL:
  4228. case TargetOpcode::G_FADD:
  4229. case TargetOpcode::G_FMUL:
  4230. case TargetOpcode::G_UADDSAT:
  4231. case TargetOpcode::G_USUBSAT:
  4232. case TargetOpcode::G_SADDSAT:
  4233. case TargetOpcode::G_SSUBSAT:
  4234. case TargetOpcode::G_SMIN:
  4235. case TargetOpcode::G_SMAX:
  4236. case TargetOpcode::G_UMIN:
  4237. case TargetOpcode::G_UMAX:
  4238. case TargetOpcode::G_FMINNUM:
  4239. case TargetOpcode::G_FMAXNUM:
  4240. case TargetOpcode::G_FMINNUM_IEEE:
  4241. case TargetOpcode::G_FMAXNUM_IEEE:
  4242. case TargetOpcode::G_FMINIMUM:
  4243. case TargetOpcode::G_FMAXIMUM: {
  4244. Observer.changingInstr(MI);
  4245. moreElementsVectorSrc(MI, MoreTy, 1);
  4246. moreElementsVectorSrc(MI, MoreTy, 2);
  4247. moreElementsVectorDst(MI, MoreTy, 0);
  4248. Observer.changedInstr(MI);
  4249. return Legalized;
  4250. }
  4251. case TargetOpcode::G_FMA:
  4252. case TargetOpcode::G_FSHR:
  4253. case TargetOpcode::G_FSHL: {
  4254. Observer.changingInstr(MI);
  4255. moreElementsVectorSrc(MI, MoreTy, 1);
  4256. moreElementsVectorSrc(MI, MoreTy, 2);
  4257. moreElementsVectorSrc(MI, MoreTy, 3);
  4258. moreElementsVectorDst(MI, MoreTy, 0);
  4259. Observer.changedInstr(MI);
  4260. return Legalized;
  4261. }
  4262. case TargetOpcode::G_EXTRACT:
  4263. if (TypeIdx != 1)
  4264. return UnableToLegalize;
  4265. Observer.changingInstr(MI);
  4266. moreElementsVectorSrc(MI, MoreTy, 1);
  4267. Observer.changedInstr(MI);
  4268. return Legalized;
  4269. case TargetOpcode::G_INSERT:
  4270. case TargetOpcode::G_FREEZE:
  4271. case TargetOpcode::G_FNEG:
  4272. case TargetOpcode::G_FABS:
  4273. case TargetOpcode::G_BSWAP:
  4274. case TargetOpcode::G_FCANONICALIZE:
  4275. case TargetOpcode::G_SEXT_INREG:
  4276. if (TypeIdx != 0)
  4277. return UnableToLegalize;
  4278. Observer.changingInstr(MI);
  4279. moreElementsVectorSrc(MI, MoreTy, 1);
  4280. moreElementsVectorDst(MI, MoreTy, 0);
  4281. Observer.changedInstr(MI);
  4282. return Legalized;
  4283. case TargetOpcode::G_SELECT:
  4284. if (TypeIdx != 0)
  4285. return UnableToLegalize;
  4286. if (MRI.getType(MI.getOperand(1).getReg()).isVector())
  4287. return UnableToLegalize;
  4288. Observer.changingInstr(MI);
  4289. moreElementsVectorSrc(MI, MoreTy, 2);
  4290. moreElementsVectorSrc(MI, MoreTy, 3);
  4291. moreElementsVectorDst(MI, MoreTy, 0);
  4292. Observer.changedInstr(MI);
  4293. return Legalized;
  4294. case TargetOpcode::G_UNMERGE_VALUES:
  4295. return UnableToLegalize;
  4296. case TargetOpcode::G_PHI:
  4297. return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
  4298. case TargetOpcode::G_SHUFFLE_VECTOR:
  4299. return moreElementsVectorShuffle(MI, TypeIdx, MoreTy);
  4300. case TargetOpcode::G_BUILD_VECTOR: {
  4301. SmallVector<SrcOp, 8> Elts;
  4302. for (auto Op : MI.uses()) {
  4303. Elts.push_back(Op.getReg());
  4304. }
  4305. for (unsigned i = Elts.size(); i < MoreTy.getNumElements(); ++i) {
  4306. Elts.push_back(MIRBuilder.buildUndef(MoreTy.getScalarType()));
  4307. }
  4308. MIRBuilder.buildDeleteTrailingVectorElements(
  4309. MI.getOperand(0).getReg(), MIRBuilder.buildInstr(Opc, {MoreTy}, Elts));
  4310. MI.eraseFromParent();
  4311. return Legalized;
  4312. }
  4313. case TargetOpcode::G_TRUNC: {
  4314. Observer.changingInstr(MI);
  4315. moreElementsVectorSrc(MI, MoreTy, 1);
  4316. moreElementsVectorDst(MI, MoreTy, 0);
  4317. Observer.changedInstr(MI);
  4318. return Legalized;
  4319. }
  4320. default:
  4321. return UnableToLegalize;
  4322. }
  4323. }
  4324. LegalizerHelper::LegalizeResult
  4325. LegalizerHelper::moreElementsVectorShuffle(MachineInstr &MI,
  4326. unsigned int TypeIdx, LLT MoreTy) {
  4327. if (TypeIdx != 0)
  4328. return UnableToLegalize;
  4329. Register DstReg = MI.getOperand(0).getReg();
  4330. Register Src1Reg = MI.getOperand(1).getReg();
  4331. Register Src2Reg = MI.getOperand(2).getReg();
  4332. ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
  4333. LLT DstTy = MRI.getType(DstReg);
  4334. LLT Src1Ty = MRI.getType(Src1Reg);
  4335. LLT Src2Ty = MRI.getType(Src2Reg);
  4336. unsigned NumElts = DstTy.getNumElements();
  4337. unsigned WidenNumElts = MoreTy.getNumElements();
  4338. // Expect a canonicalized shuffle.
  4339. if (DstTy != Src1Ty || DstTy != Src2Ty)
  4340. return UnableToLegalize;
  4341. moreElementsVectorSrc(MI, MoreTy, 1);
  4342. moreElementsVectorSrc(MI, MoreTy, 2);
  4343. // Adjust mask based on new input vector length.
  4344. SmallVector<int, 16> NewMask;
  4345. for (unsigned I = 0; I != NumElts; ++I) {
  4346. int Idx = Mask[I];
  4347. if (Idx < static_cast<int>(NumElts))
  4348. NewMask.push_back(Idx);
  4349. else
  4350. NewMask.push_back(Idx - NumElts + WidenNumElts);
  4351. }
  4352. for (unsigned I = NumElts; I != WidenNumElts; ++I)
  4353. NewMask.push_back(-1);
  4354. moreElementsVectorDst(MI, MoreTy, 0);
  4355. MIRBuilder.setInstrAndDebugLoc(MI);
  4356. MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
  4357. MI.getOperand(1).getReg(),
  4358. MI.getOperand(2).getReg(), NewMask);
  4359. MI.eraseFromParent();
  4360. return Legalized;
  4361. }
  4362. void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
  4363. ArrayRef<Register> Src1Regs,
  4364. ArrayRef<Register> Src2Regs,
  4365. LLT NarrowTy) {
  4366. MachineIRBuilder &B = MIRBuilder;
  4367. unsigned SrcParts = Src1Regs.size();
  4368. unsigned DstParts = DstRegs.size();
  4369. unsigned DstIdx = 0; // Low bits of the result.
  4370. Register FactorSum =
  4371. B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
  4372. DstRegs[DstIdx] = FactorSum;
  4373. unsigned CarrySumPrevDstIdx;
  4374. SmallVector<Register, 4> Factors;
  4375. for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
  4376. // Collect low parts of muls for DstIdx.
  4377. for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
  4378. i <= std::min(DstIdx, SrcParts - 1); ++i) {
  4379. MachineInstrBuilder Mul =
  4380. B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
  4381. Factors.push_back(Mul.getReg(0));
  4382. }
  4383. // Collect high parts of muls from previous DstIdx.
  4384. for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
  4385. i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
  4386. MachineInstrBuilder Umulh =
  4387. B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
  4388. Factors.push_back(Umulh.getReg(0));
  4389. }
  4390. // Add CarrySum from additions calculated for previous DstIdx.
  4391. if (DstIdx != 1) {
  4392. Factors.push_back(CarrySumPrevDstIdx);
  4393. }
  4394. Register CarrySum;
  4395. // Add all factors and accumulate all carries into CarrySum.
  4396. if (DstIdx != DstParts - 1) {
  4397. MachineInstrBuilder Uaddo =
  4398. B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
  4399. FactorSum = Uaddo.getReg(0);
  4400. CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
  4401. for (unsigned i = 2; i < Factors.size(); ++i) {
  4402. MachineInstrBuilder Uaddo =
  4403. B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
  4404. FactorSum = Uaddo.getReg(0);
  4405. MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
  4406. CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
  4407. }
  4408. } else {
  4409. // Since value for the next index is not calculated, neither is CarrySum.
  4410. FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
  4411. for (unsigned i = 2; i < Factors.size(); ++i)
  4412. FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
  4413. }
  4414. CarrySumPrevDstIdx = CarrySum;
  4415. DstRegs[DstIdx] = FactorSum;
  4416. Factors.clear();
  4417. }
  4418. }
  4419. LegalizerHelper::LegalizeResult
  4420. LegalizerHelper::narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx,
  4421. LLT NarrowTy) {
  4422. if (TypeIdx != 0)
  4423. return UnableToLegalize;
  4424. Register DstReg = MI.getOperand(0).getReg();
  4425. LLT DstType = MRI.getType(DstReg);
  4426. // FIXME: add support for vector types
  4427. if (DstType.isVector())
  4428. return UnableToLegalize;
  4429. unsigned Opcode = MI.getOpcode();
  4430. unsigned OpO, OpE, OpF;
  4431. switch (Opcode) {
  4432. case TargetOpcode::G_SADDO:
  4433. case TargetOpcode::G_SADDE:
  4434. case TargetOpcode::G_UADDO:
  4435. case TargetOpcode::G_UADDE:
  4436. case TargetOpcode::G_ADD:
  4437. OpO = TargetOpcode::G_UADDO;
  4438. OpE = TargetOpcode::G_UADDE;
  4439. OpF = TargetOpcode::G_UADDE;
  4440. if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
  4441. OpF = TargetOpcode::G_SADDE;
  4442. break;
  4443. case TargetOpcode::G_SSUBO:
  4444. case TargetOpcode::G_SSUBE:
  4445. case TargetOpcode::G_USUBO:
  4446. case TargetOpcode::G_USUBE:
  4447. case TargetOpcode::G_SUB:
  4448. OpO = TargetOpcode::G_USUBO;
  4449. OpE = TargetOpcode::G_USUBE;
  4450. OpF = TargetOpcode::G_USUBE;
  4451. if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
  4452. OpF = TargetOpcode::G_SSUBE;
  4453. break;
  4454. default:
  4455. llvm_unreachable("Unexpected add/sub opcode!");
  4456. }
  4457. // 1 for a plain add/sub, 2 if this is an operation with a carry-out.
  4458. unsigned NumDefs = MI.getNumExplicitDefs();
  4459. Register Src1 = MI.getOperand(NumDefs).getReg();
  4460. Register Src2 = MI.getOperand(NumDefs + 1).getReg();
  4461. Register CarryDst, CarryIn;
  4462. if (NumDefs == 2)
  4463. CarryDst = MI.getOperand(1).getReg();
  4464. if (MI.getNumOperands() == NumDefs + 3)
  4465. CarryIn = MI.getOperand(NumDefs + 2).getReg();
  4466. LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
  4467. LLT LeftoverTy, DummyTy;
  4468. SmallVector<Register, 2> Src1Regs, Src2Regs, Src1Left, Src2Left, DstRegs;
  4469. extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left);
  4470. extractParts(Src2, RegTy, NarrowTy, DummyTy, Src2Regs, Src2Left);
  4471. int NarrowParts = Src1Regs.size();
  4472. for (int I = 0, E = Src1Left.size(); I != E; ++I) {
  4473. Src1Regs.push_back(Src1Left[I]);
  4474. Src2Regs.push_back(Src2Left[I]);
  4475. }
  4476. DstRegs.reserve(Src1Regs.size());
  4477. for (int i = 0, e = Src1Regs.size(); i != e; ++i) {
  4478. Register DstReg =
  4479. MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i]));
  4480. Register CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
  4481. // Forward the final carry-out to the destination register
  4482. if (i == e - 1 && CarryDst)
  4483. CarryOut = CarryDst;
  4484. if (!CarryIn) {
  4485. MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
  4486. {Src1Regs[i], Src2Regs[i]});
  4487. } else if (i == e - 1) {
  4488. MIRBuilder.buildInstr(OpF, {DstReg, CarryOut},
  4489. {Src1Regs[i], Src2Regs[i], CarryIn});
  4490. } else {
  4491. MIRBuilder.buildInstr(OpE, {DstReg, CarryOut},
  4492. {Src1Regs[i], Src2Regs[i], CarryIn});
  4493. }
  4494. DstRegs.push_back(DstReg);
  4495. CarryIn = CarryOut;
  4496. }
  4497. insertParts(MI.getOperand(0).getReg(), RegTy, NarrowTy,
  4498. makeArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
  4499. makeArrayRef(DstRegs).drop_front(NarrowParts));
  4500. MI.eraseFromParent();
  4501. return Legalized;
  4502. }
  4503. LegalizerHelper::LegalizeResult
  4504. LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
  4505. Register DstReg = MI.getOperand(0).getReg();
  4506. Register Src1 = MI.getOperand(1).getReg();
  4507. Register Src2 = MI.getOperand(2).getReg();
  4508. LLT Ty = MRI.getType(DstReg);
  4509. if (Ty.isVector())
  4510. return UnableToLegalize;
  4511. unsigned Size = Ty.getSizeInBits();
  4512. unsigned NarrowSize = NarrowTy.getSizeInBits();
  4513. if (Size % NarrowSize != 0)
  4514. return UnableToLegalize;
  4515. unsigned NumParts = Size / NarrowSize;
  4516. bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
  4517. unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
  4518. SmallVector<Register, 2> Src1Parts, Src2Parts;
  4519. SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
  4520. extractParts(Src1, NarrowTy, NumParts, Src1Parts);
  4521. extractParts(Src2, NarrowTy, NumParts, Src2Parts);
  4522. multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
  4523. // Take only high half of registers if this is high mul.
  4524. ArrayRef<Register> DstRegs(&DstTmpRegs[DstTmpParts - NumParts], NumParts);
  4525. MIRBuilder.buildMerge(DstReg, DstRegs);
  4526. MI.eraseFromParent();
  4527. return Legalized;
  4528. }
  4529. LegalizerHelper::LegalizeResult
  4530. LegalizerHelper::narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx,
  4531. LLT NarrowTy) {
  4532. if (TypeIdx != 0)
  4533. return UnableToLegalize;
  4534. bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI;
  4535. Register Src = MI.getOperand(1).getReg();
  4536. LLT SrcTy = MRI.getType(Src);
  4537. // If all finite floats fit into the narrowed integer type, we can just swap
  4538. // out the result type. This is practically only useful for conversions from
  4539. // half to at least 16-bits, so just handle the one case.
  4540. if (SrcTy.getScalarType() != LLT::scalar(16) ||
  4541. NarrowTy.getScalarSizeInBits() < (IsSigned ? 17u : 16u))
  4542. return UnableToLegalize;
  4543. Observer.changingInstr(MI);
  4544. narrowScalarDst(MI, NarrowTy, 0,
  4545. IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
  4546. Observer.changedInstr(MI);
  4547. return Legalized;
  4548. }
  4549. LegalizerHelper::LegalizeResult
  4550. LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx,
  4551. LLT NarrowTy) {
  4552. if (TypeIdx != 1)
  4553. return UnableToLegalize;
  4554. uint64_t NarrowSize = NarrowTy.getSizeInBits();
  4555. int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
  4556. // FIXME: add support for when SizeOp1 isn't an exact multiple of
  4557. // NarrowSize.
  4558. if (SizeOp1 % NarrowSize != 0)
  4559. return UnableToLegalize;
  4560. int NumParts = SizeOp1 / NarrowSize;
  4561. SmallVector<Register, 2> SrcRegs, DstRegs;
  4562. SmallVector<uint64_t, 2> Indexes;
  4563. extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
  4564. Register OpReg = MI.getOperand(0).getReg();
  4565. uint64_t OpStart = MI.getOperand(2).getImm();
  4566. uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
  4567. for (int i = 0; i < NumParts; ++i) {
  4568. unsigned SrcStart = i * NarrowSize;
  4569. if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
  4570. // No part of the extract uses this subregister, ignore it.
  4571. continue;
  4572. } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
  4573. // The entire subregister is extracted, forward the value.
  4574. DstRegs.push_back(SrcRegs[i]);
  4575. continue;
  4576. }
  4577. // OpSegStart is where this destination segment would start in OpReg if it
  4578. // extended infinitely in both directions.
  4579. int64_t ExtractOffset;
  4580. uint64_t SegSize;
  4581. if (OpStart < SrcStart) {
  4582. ExtractOffset = 0;
  4583. SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
  4584. } else {
  4585. ExtractOffset = OpStart - SrcStart;
  4586. SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
  4587. }
  4588. Register SegReg = SrcRegs[i];
  4589. if (ExtractOffset != 0 || SegSize != NarrowSize) {
  4590. // A genuine extract is needed.
  4591. SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
  4592. MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
  4593. }
  4594. DstRegs.push_back(SegReg);
  4595. }
  4596. Register DstReg = MI.getOperand(0).getReg();
  4597. if (MRI.getType(DstReg).isVector())
  4598. MIRBuilder.buildBuildVector(DstReg, DstRegs);
  4599. else if (DstRegs.size() > 1)
  4600. MIRBuilder.buildMerge(DstReg, DstRegs);
  4601. else
  4602. MIRBuilder.buildCopy(DstReg, DstRegs[0]);
  4603. MI.eraseFromParent();
  4604. return Legalized;
  4605. }
  4606. LegalizerHelper::LegalizeResult
  4607. LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx,
  4608. LLT NarrowTy) {
  4609. // FIXME: Don't know how to handle secondary types yet.
  4610. if (TypeIdx != 0)
  4611. return UnableToLegalize;
  4612. SmallVector<Register, 2> SrcRegs, LeftoverRegs, DstRegs;
  4613. SmallVector<uint64_t, 2> Indexes;
  4614. LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
  4615. LLT LeftoverTy;
  4616. extractParts(MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
  4617. LeftoverRegs);
  4618. for (Register Reg : LeftoverRegs)
  4619. SrcRegs.push_back(Reg);
  4620. uint64_t NarrowSize = NarrowTy.getSizeInBits();
  4621. Register OpReg = MI.getOperand(2).getReg();
  4622. uint64_t OpStart = MI.getOperand(3).getImm();
  4623. uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
  4624. for (int I = 0, E = SrcRegs.size(); I != E; ++I) {
  4625. unsigned DstStart = I * NarrowSize;
  4626. if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
  4627. // The entire subregister is defined by this insert, forward the new
  4628. // value.
  4629. DstRegs.push_back(OpReg);
  4630. continue;
  4631. }
  4632. Register SrcReg = SrcRegs[I];
  4633. if (MRI.getType(SrcRegs[I]) == LeftoverTy) {
  4634. // The leftover reg is smaller than NarrowTy, so we need to extend it.
  4635. SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
  4636. MIRBuilder.buildAnyExt(SrcReg, SrcRegs[I]);
  4637. }
  4638. if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
  4639. // No part of the insert affects this subregister, forward the original.
  4640. DstRegs.push_back(SrcReg);
  4641. continue;
  4642. }
  4643. // OpSegStart is where this destination segment would start in OpReg if it
  4644. // extended infinitely in both directions.
  4645. int64_t ExtractOffset, InsertOffset;
  4646. uint64_t SegSize;
  4647. if (OpStart < DstStart) {
  4648. InsertOffset = 0;
  4649. ExtractOffset = DstStart - OpStart;
  4650. SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
  4651. } else {
  4652. InsertOffset = OpStart - DstStart;
  4653. ExtractOffset = 0;
  4654. SegSize =
  4655. std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
  4656. }
  4657. Register SegReg = OpReg;
  4658. if (ExtractOffset != 0 || SegSize != OpSize) {
  4659. // A genuine extract is needed.
  4660. SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
  4661. MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
  4662. }
  4663. Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
  4664. MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset);
  4665. DstRegs.push_back(DstReg);
  4666. }
  4667. uint64_t WideSize = DstRegs.size() * NarrowSize;
  4668. Register DstReg = MI.getOperand(0).getReg();
  4669. if (WideSize > RegTy.getSizeInBits()) {
  4670. Register MergeReg = MRI.createGenericVirtualRegister(LLT::scalar(WideSize));
  4671. MIRBuilder.buildMerge(MergeReg, DstRegs);
  4672. MIRBuilder.buildTrunc(DstReg, MergeReg);
  4673. } else
  4674. MIRBuilder.buildMerge(DstReg, DstRegs);
  4675. MI.eraseFromParent();
  4676. return Legalized;
  4677. }
  4678. LegalizerHelper::LegalizeResult
  4679. LegalizerHelper::narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx,
  4680. LLT NarrowTy) {
  4681. Register DstReg = MI.getOperand(0).getReg();
  4682. LLT DstTy = MRI.getType(DstReg);
  4683. assert(MI.getNumOperands() == 3 && TypeIdx == 0);
  4684. SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
  4685. SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
  4686. SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
  4687. LLT LeftoverTy;
  4688. if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
  4689. Src0Regs, Src0LeftoverRegs))
  4690. return UnableToLegalize;
  4691. LLT Unused;
  4692. if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
  4693. Src1Regs, Src1LeftoverRegs))
  4694. llvm_unreachable("inconsistent extractParts result");
  4695. for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
  4696. auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
  4697. {Src0Regs[I], Src1Regs[I]});
  4698. DstRegs.push_back(Inst.getReg(0));
  4699. }
  4700. for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
  4701. auto Inst = MIRBuilder.buildInstr(
  4702. MI.getOpcode(),
  4703. {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
  4704. DstLeftoverRegs.push_back(Inst.getReg(0));
  4705. }
  4706. insertParts(DstReg, DstTy, NarrowTy, DstRegs,
  4707. LeftoverTy, DstLeftoverRegs);
  4708. MI.eraseFromParent();
  4709. return Legalized;
  4710. }
  4711. LegalizerHelper::LegalizeResult
  4712. LegalizerHelper::narrowScalarExt(MachineInstr &MI, unsigned TypeIdx,
  4713. LLT NarrowTy) {
  4714. if (TypeIdx != 0)
  4715. return UnableToLegalize;
  4716. Register DstReg = MI.getOperand(0).getReg();
  4717. Register SrcReg = MI.getOperand(1).getReg();
  4718. LLT DstTy = MRI.getType(DstReg);
  4719. if (DstTy.isVector())
  4720. return UnableToLegalize;
  4721. SmallVector<Register, 8> Parts;
  4722. LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
  4723. LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
  4724. buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
  4725. MI.eraseFromParent();
  4726. return Legalized;
  4727. }
  4728. LegalizerHelper::LegalizeResult
  4729. LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx,
  4730. LLT NarrowTy) {
  4731. if (TypeIdx != 0)
  4732. return UnableToLegalize;
  4733. Register CondReg = MI.getOperand(1).getReg();
  4734. LLT CondTy = MRI.getType(CondReg);
  4735. if (CondTy.isVector()) // TODO: Handle vselect
  4736. return UnableToLegalize;
  4737. Register DstReg = MI.getOperand(0).getReg();
  4738. LLT DstTy = MRI.getType(DstReg);
  4739. SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
  4740. SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
  4741. SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
  4742. LLT LeftoverTy;
  4743. if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
  4744. Src1Regs, Src1LeftoverRegs))
  4745. return UnableToLegalize;
  4746. LLT Unused;
  4747. if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
  4748. Src2Regs, Src2LeftoverRegs))
  4749. llvm_unreachable("inconsistent extractParts result");
  4750. for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
  4751. auto Select = MIRBuilder.buildSelect(NarrowTy,
  4752. CondReg, Src1Regs[I], Src2Regs[I]);
  4753. DstRegs.push_back(Select.getReg(0));
  4754. }
  4755. for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
  4756. auto Select = MIRBuilder.buildSelect(
  4757. LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
  4758. DstLeftoverRegs.push_back(Select.getReg(0));
  4759. }
  4760. insertParts(DstReg, DstTy, NarrowTy, DstRegs,
  4761. LeftoverTy, DstLeftoverRegs);
  4762. MI.eraseFromParent();
  4763. return Legalized;
  4764. }
  4765. LegalizerHelper::LegalizeResult
  4766. LegalizerHelper::narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx,
  4767. LLT NarrowTy) {
  4768. if (TypeIdx != 1)
  4769. return UnableToLegalize;
  4770. Register DstReg = MI.getOperand(0).getReg();
  4771. Register SrcReg = MI.getOperand(1).getReg();
  4772. LLT DstTy = MRI.getType(DstReg);
  4773. LLT SrcTy = MRI.getType(SrcReg);
  4774. unsigned NarrowSize = NarrowTy.getSizeInBits();
  4775. if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
  4776. const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
  4777. MachineIRBuilder &B = MIRBuilder;
  4778. auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
  4779. // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
  4780. auto C_0 = B.buildConstant(NarrowTy, 0);
  4781. auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
  4782. UnmergeSrc.getReg(1), C_0);
  4783. auto LoCTLZ = IsUndef ?
  4784. B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
  4785. B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
  4786. auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
  4787. auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
  4788. auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
  4789. B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
  4790. MI.eraseFromParent();
  4791. return Legalized;
  4792. }
  4793. return UnableToLegalize;
  4794. }
  4795. LegalizerHelper::LegalizeResult
  4796. LegalizerHelper::narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx,
  4797. LLT NarrowTy) {
  4798. if (TypeIdx != 1)
  4799. return UnableToLegalize;
  4800. Register DstReg = MI.getOperand(0).getReg();
  4801. Register SrcReg = MI.getOperand(1).getReg();
  4802. LLT DstTy = MRI.getType(DstReg);
  4803. LLT SrcTy = MRI.getType(SrcReg);
  4804. unsigned NarrowSize = NarrowTy.getSizeInBits();
  4805. if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
  4806. const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
  4807. MachineIRBuilder &B = MIRBuilder;
  4808. auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
  4809. // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
  4810. auto C_0 = B.buildConstant(NarrowTy, 0);
  4811. auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
  4812. UnmergeSrc.getReg(0), C_0);
  4813. auto HiCTTZ = IsUndef ?
  4814. B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
  4815. B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
  4816. auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
  4817. auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
  4818. auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
  4819. B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
  4820. MI.eraseFromParent();
  4821. return Legalized;
  4822. }
  4823. return UnableToLegalize;
  4824. }
  4825. LegalizerHelper::LegalizeResult
  4826. LegalizerHelper::narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx,
  4827. LLT NarrowTy) {
  4828. if (TypeIdx != 1)
  4829. return UnableToLegalize;
  4830. Register DstReg = MI.getOperand(0).getReg();
  4831. LLT DstTy = MRI.getType(DstReg);
  4832. LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
  4833. unsigned NarrowSize = NarrowTy.getSizeInBits();
  4834. if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
  4835. auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
  4836. auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
  4837. auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
  4838. MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
  4839. MI.eraseFromParent();
  4840. return Legalized;
  4841. }
  4842. return UnableToLegalize;
  4843. }
  4844. LegalizerHelper::LegalizeResult
  4845. LegalizerHelper::lowerBitCount(MachineInstr &MI) {
  4846. unsigned Opc = MI.getOpcode();
  4847. const auto &TII = MIRBuilder.getTII();
  4848. auto isSupported = [this](const LegalityQuery &Q) {
  4849. auto QAction = LI.getAction(Q).Action;
  4850. return QAction == Legal || QAction == Libcall || QAction == Custom;
  4851. };
  4852. switch (Opc) {
  4853. default:
  4854. return UnableToLegalize;
  4855. case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
  4856. // This trivially expands to CTLZ.
  4857. Observer.changingInstr(MI);
  4858. MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
  4859. Observer.changedInstr(MI);
  4860. return Legalized;
  4861. }
  4862. case TargetOpcode::G_CTLZ: {
  4863. Register DstReg = MI.getOperand(0).getReg();
  4864. Register SrcReg = MI.getOperand(1).getReg();
  4865. LLT DstTy = MRI.getType(DstReg);
  4866. LLT SrcTy = MRI.getType(SrcReg);
  4867. unsigned Len = SrcTy.getSizeInBits();
  4868. if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
  4869. // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
  4870. auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg);
  4871. auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0);
  4872. auto ICmp = MIRBuilder.buildICmp(
  4873. CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc);
  4874. auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
  4875. MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
  4876. MI.eraseFromParent();
  4877. return Legalized;
  4878. }
  4879. // for now, we do this:
  4880. // NewLen = NextPowerOf2(Len);
  4881. // x = x | (x >> 1);
  4882. // x = x | (x >> 2);
  4883. // ...
  4884. // x = x | (x >>16);
  4885. // x = x | (x >>32); // for 64-bit input
  4886. // Upto NewLen/2
  4887. // return Len - popcount(x);
  4888. //
  4889. // Ref: "Hacker's Delight" by Henry Warren
  4890. Register Op = SrcReg;
  4891. unsigned NewLen = PowerOf2Ceil(Len);
  4892. for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
  4893. auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i);
  4894. auto MIBOp = MIRBuilder.buildOr(
  4895. SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt));
  4896. Op = MIBOp.getReg(0);
  4897. }
  4898. auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op);
  4899. MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len),
  4900. MIBPop);
  4901. MI.eraseFromParent();
  4902. return Legalized;
  4903. }
  4904. case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
  4905. // This trivially expands to CTTZ.
  4906. Observer.changingInstr(MI);
  4907. MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
  4908. Observer.changedInstr(MI);
  4909. return Legalized;
  4910. }
  4911. case TargetOpcode::G_CTTZ: {
  4912. Register DstReg = MI.getOperand(0).getReg();
  4913. Register SrcReg = MI.getOperand(1).getReg();
  4914. LLT DstTy = MRI.getType(DstReg);
  4915. LLT SrcTy = MRI.getType(SrcReg);
  4916. unsigned Len = SrcTy.getSizeInBits();
  4917. if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
  4918. // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
  4919. // zero.
  4920. auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg);
  4921. auto Zero = MIRBuilder.buildConstant(SrcTy, 0);
  4922. auto ICmp = MIRBuilder.buildICmp(
  4923. CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero);
  4924. auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
  4925. MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
  4926. MI.eraseFromParent();
  4927. return Legalized;
  4928. }
  4929. // for now, we use: { return popcount(~x & (x - 1)); }
  4930. // unless the target has ctlz but not ctpop, in which case we use:
  4931. // { return 32 - nlz(~x & (x-1)); }
  4932. // Ref: "Hacker's Delight" by Henry Warren
  4933. auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1);
  4934. auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
  4935. auto MIBTmp = MIRBuilder.buildAnd(
  4936. SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
  4937. if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
  4938. isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
  4939. auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len);
  4940. MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
  4941. MIRBuilder.buildCTLZ(SrcTy, MIBTmp));
  4942. MI.eraseFromParent();
  4943. return Legalized;
  4944. }
  4945. MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
  4946. MI.getOperand(1).setReg(MIBTmp.getReg(0));
  4947. return Legalized;
  4948. }
  4949. case TargetOpcode::G_CTPOP: {
  4950. Register SrcReg = MI.getOperand(1).getReg();
  4951. LLT Ty = MRI.getType(SrcReg);
  4952. unsigned Size = Ty.getSizeInBits();
  4953. MachineIRBuilder &B = MIRBuilder;
  4954. // Count set bits in blocks of 2 bits. Default approach would be
  4955. // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
  4956. // We use following formula instead:
  4957. // B2Count = val - { (val >> 1) & 0x55555555 }
  4958. // since it gives same result in blocks of 2 with one instruction less.
  4959. auto C_1 = B.buildConstant(Ty, 1);
  4960. auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1);
  4961. APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55));
  4962. auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0);
  4963. auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
  4964. auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi);
  4965. // In order to get count in blocks of 4 add values from adjacent block of 2.
  4966. // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
  4967. auto C_2 = B.buildConstant(Ty, 2);
  4968. auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2);
  4969. APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33));
  4970. auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0);
  4971. auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
  4972. auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
  4973. auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
  4974. // For count in blocks of 8 bits we don't have to mask high 4 bits before
  4975. // addition since count value sits in range {0,...,8} and 4 bits are enough
  4976. // to hold such binary values. After addition high 4 bits still hold count
  4977. // of set bits in high 4 bit block, set them to zero and get 8 bit result.
  4978. // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F
  4979. auto C_4 = B.buildConstant(Ty, 4);
  4980. auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4);
  4981. auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count);
  4982. APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F));
  4983. auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0);
  4984. auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
  4985. assert(Size<=128 && "Scalar size is too large for CTPOP lower algorithm");
  4986. // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
  4987. // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
  4988. auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01)));
  4989. auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
  4990. // Shift count result from 8 high bits to low bits.
  4991. auto C_SizeM8 = B.buildConstant(Ty, Size - 8);
  4992. B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
  4993. MI.eraseFromParent();
  4994. return Legalized;
  4995. }
  4996. }
  4997. }
  4998. // Check that (every element of) Reg is undef or not an exact multiple of BW.
  4999. static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI,
  5000. Register Reg, unsigned BW) {
  5001. return matchUnaryPredicate(
  5002. MRI, Reg,
  5003. [=](const Constant *C) {
  5004. // Null constant here means an undef.
  5005. const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(C);
  5006. return !CI || CI->getValue().urem(BW) != 0;
  5007. },
  5008. /*AllowUndefs*/ true);
  5009. }
  5010. LegalizerHelper::LegalizeResult
  5011. LegalizerHelper::lowerFunnelShiftWithInverse(MachineInstr &MI) {
  5012. Register Dst = MI.getOperand(0).getReg();
  5013. Register X = MI.getOperand(1).getReg();
  5014. Register Y = MI.getOperand(2).getReg();
  5015. Register Z = MI.getOperand(3).getReg();
  5016. LLT Ty = MRI.getType(Dst);
  5017. LLT ShTy = MRI.getType(Z);
  5018. unsigned BW = Ty.getScalarSizeInBits();
  5019. if (!isPowerOf2_32(BW))
  5020. return UnableToLegalize;
  5021. const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
  5022. unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
  5023. if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
  5024. // fshl X, Y, Z -> fshr X, Y, -Z
  5025. // fshr X, Y, Z -> fshl X, Y, -Z
  5026. auto Zero = MIRBuilder.buildConstant(ShTy, 0);
  5027. Z = MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
  5028. } else {
  5029. // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
  5030. // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
  5031. auto One = MIRBuilder.buildConstant(ShTy, 1);
  5032. if (IsFSHL) {
  5033. Y = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
  5034. X = MIRBuilder.buildLShr(Ty, X, One).getReg(0);
  5035. } else {
  5036. X = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
  5037. Y = MIRBuilder.buildShl(Ty, Y, One).getReg(0);
  5038. }
  5039. Z = MIRBuilder.buildNot(ShTy, Z).getReg(0);
  5040. }
  5041. MIRBuilder.buildInstr(RevOpcode, {Dst}, {X, Y, Z});
  5042. MI.eraseFromParent();
  5043. return Legalized;
  5044. }
  5045. LegalizerHelper::LegalizeResult
  5046. LegalizerHelper::lowerFunnelShiftAsShifts(MachineInstr &MI) {
  5047. Register Dst = MI.getOperand(0).getReg();
  5048. Register X = MI.getOperand(1).getReg();
  5049. Register Y = MI.getOperand(2).getReg();
  5050. Register Z = MI.getOperand(3).getReg();
  5051. LLT Ty = MRI.getType(Dst);
  5052. LLT ShTy = MRI.getType(Z);
  5053. const unsigned BW = Ty.getScalarSizeInBits();
  5054. const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
  5055. Register ShX, ShY;
  5056. Register ShAmt, InvShAmt;
  5057. // FIXME: Emit optimized urem by constant instead of letting it expand later.
  5058. if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
  5059. // fshl: X << C | Y >> (BW - C)
  5060. // fshr: X << (BW - C) | Y >> C
  5061. // where C = Z % BW is not zero
  5062. auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
  5063. ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
  5064. InvShAmt = MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
  5065. ShX = MIRBuilder.buildShl(Ty, X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
  5066. ShY = MIRBuilder.buildLShr(Ty, Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
  5067. } else {
  5068. // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
  5069. // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
  5070. auto Mask = MIRBuilder.buildConstant(ShTy, BW - 1);
  5071. if (isPowerOf2_32(BW)) {
  5072. // Z % BW -> Z & (BW - 1)
  5073. ShAmt = MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
  5074. // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
  5075. auto NotZ = MIRBuilder.buildNot(ShTy, Z);
  5076. InvShAmt = MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
  5077. } else {
  5078. auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
  5079. ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
  5080. InvShAmt = MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
  5081. }
  5082. auto One = MIRBuilder.buildConstant(ShTy, 1);
  5083. if (IsFSHL) {
  5084. ShX = MIRBuilder.buildShl(Ty, X, ShAmt).getReg(0);
  5085. auto ShY1 = MIRBuilder.buildLShr(Ty, Y, One);
  5086. ShY = MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
  5087. } else {
  5088. auto ShX1 = MIRBuilder.buildShl(Ty, X, One);
  5089. ShX = MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
  5090. ShY = MIRBuilder.buildLShr(Ty, Y, ShAmt).getReg(0);
  5091. }
  5092. }
  5093. MIRBuilder.buildOr(Dst, ShX, ShY);
  5094. MI.eraseFromParent();
  5095. return Legalized;
  5096. }
  5097. LegalizerHelper::LegalizeResult
  5098. LegalizerHelper::lowerFunnelShift(MachineInstr &MI) {
  5099. // These operations approximately do the following (while avoiding undefined
  5100. // shifts by BW):
  5101. // G_FSHL: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
  5102. // G_FSHR: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
  5103. Register Dst = MI.getOperand(0).getReg();
  5104. LLT Ty = MRI.getType(Dst);
  5105. LLT ShTy = MRI.getType(MI.getOperand(3).getReg());
  5106. bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
  5107. unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
  5108. // TODO: Use smarter heuristic that accounts for vector legalization.
  5109. if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action == Lower)
  5110. return lowerFunnelShiftAsShifts(MI);
  5111. // This only works for powers of 2, fallback to shifts if it fails.
  5112. LegalizerHelper::LegalizeResult Result = lowerFunnelShiftWithInverse(MI);
  5113. if (Result == UnableToLegalize)
  5114. return lowerFunnelShiftAsShifts(MI);
  5115. return Result;
  5116. }
  5117. LegalizerHelper::LegalizeResult
  5118. LegalizerHelper::lowerRotateWithReverseRotate(MachineInstr &MI) {
  5119. Register Dst = MI.getOperand(0).getReg();
  5120. Register Src = MI.getOperand(1).getReg();
  5121. Register Amt = MI.getOperand(2).getReg();
  5122. LLT AmtTy = MRI.getType(Amt);
  5123. auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
  5124. bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
  5125. unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
  5126. auto Neg = MIRBuilder.buildSub(AmtTy, Zero, Amt);
  5127. MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg});
  5128. MI.eraseFromParent();
  5129. return Legalized;
  5130. }
  5131. LegalizerHelper::LegalizeResult LegalizerHelper::lowerRotate(MachineInstr &MI) {
  5132. Register Dst = MI.getOperand(0).getReg();
  5133. Register Src = MI.getOperand(1).getReg();
  5134. Register Amt = MI.getOperand(2).getReg();
  5135. LLT DstTy = MRI.getType(Dst);
  5136. LLT SrcTy = MRI.getType(Src);
  5137. LLT AmtTy = MRI.getType(Amt);
  5138. unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
  5139. bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
  5140. MIRBuilder.setInstrAndDebugLoc(MI);
  5141. // If a rotate in the other direction is supported, use it.
  5142. unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
  5143. if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) &&
  5144. isPowerOf2_32(EltSizeInBits))
  5145. return lowerRotateWithReverseRotate(MI);
  5146. // If a funnel shift is supported, use it.
  5147. unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
  5148. unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
  5149. bool IsFShLegal = false;
  5150. if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
  5151. LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
  5152. auto buildFunnelShift = [&](unsigned Opc, Register R1, Register R2,
  5153. Register R3) {
  5154. MIRBuilder.buildInstr(Opc, {R1}, {R2, R2, R3});
  5155. MI.eraseFromParent();
  5156. return Legalized;
  5157. };
  5158. // If a funnel shift in the other direction is supported, use it.
  5159. if (IsFShLegal) {
  5160. return buildFunnelShift(FShOpc, Dst, Src, Amt);
  5161. } else if (isPowerOf2_32(EltSizeInBits)) {
  5162. Amt = MIRBuilder.buildNeg(DstTy, Amt).getReg(0);
  5163. return buildFunnelShift(RevFsh, Dst, Src, Amt);
  5164. }
  5165. }
  5166. auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
  5167. unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
  5168. unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
  5169. auto BitWidthMinusOneC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits - 1);
  5170. Register ShVal;
  5171. Register RevShiftVal;
  5172. if (isPowerOf2_32(EltSizeInBits)) {
  5173. // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
  5174. // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
  5175. auto NegAmt = MIRBuilder.buildSub(AmtTy, Zero, Amt);
  5176. auto ShAmt = MIRBuilder.buildAnd(AmtTy, Amt, BitWidthMinusOneC);
  5177. ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
  5178. auto RevAmt = MIRBuilder.buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
  5179. RevShiftVal =
  5180. MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, RevAmt}).getReg(0);
  5181. } else {
  5182. // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
  5183. // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
  5184. auto BitWidthC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits);
  5185. auto ShAmt = MIRBuilder.buildURem(AmtTy, Amt, BitWidthC);
  5186. ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
  5187. auto RevAmt = MIRBuilder.buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
  5188. auto One = MIRBuilder.buildConstant(AmtTy, 1);
  5189. auto Inner = MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, One});
  5190. RevShiftVal =
  5191. MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Inner, RevAmt}).getReg(0);
  5192. }
  5193. MIRBuilder.buildOr(Dst, ShVal, RevShiftVal);
  5194. MI.eraseFromParent();
  5195. return Legalized;
  5196. }
  5197. // Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
  5198. // representation.
  5199. LegalizerHelper::LegalizeResult
  5200. LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) {
  5201. Register Dst = MI.getOperand(0).getReg();
  5202. Register Src = MI.getOperand(1).getReg();
  5203. const LLT S64 = LLT::scalar(64);
  5204. const LLT S32 = LLT::scalar(32);
  5205. const LLT S1 = LLT::scalar(1);
  5206. assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
  5207. // unsigned cul2f(ulong u) {
  5208. // uint lz = clz(u);
  5209. // uint e = (u != 0) ? 127U + 63U - lz : 0;
  5210. // u = (u << lz) & 0x7fffffffffffffffUL;
  5211. // ulong t = u & 0xffffffffffUL;
  5212. // uint v = (e << 23) | (uint)(u >> 40);
  5213. // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
  5214. // return as_float(v + r);
  5215. // }
  5216. auto Zero32 = MIRBuilder.buildConstant(S32, 0);
  5217. auto Zero64 = MIRBuilder.buildConstant(S64, 0);
  5218. auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
  5219. auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
  5220. auto Sub = MIRBuilder.buildSub(S32, K, LZ);
  5221. auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
  5222. auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
  5223. auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
  5224. auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
  5225. auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
  5226. auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
  5227. auto T = MIRBuilder.buildAnd(S64, U, Mask1);
  5228. auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
  5229. auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
  5230. auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
  5231. auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
  5232. auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
  5233. auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
  5234. auto One = MIRBuilder.buildConstant(S32, 1);
  5235. auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
  5236. auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
  5237. auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
  5238. MIRBuilder.buildAdd(Dst, V, R);
  5239. MI.eraseFromParent();
  5240. return Legalized;
  5241. }
  5242. LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) {
  5243. Register Dst = MI.getOperand(0).getReg();
  5244. Register Src = MI.getOperand(1).getReg();
  5245. LLT DstTy = MRI.getType(Dst);
  5246. LLT SrcTy = MRI.getType(Src);
  5247. if (SrcTy == LLT::scalar(1)) {
  5248. auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
  5249. auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
  5250. MIRBuilder.buildSelect(Dst, Src, True, False);
  5251. MI.eraseFromParent();
  5252. return Legalized;
  5253. }
  5254. if (SrcTy != LLT::scalar(64))
  5255. return UnableToLegalize;
  5256. if (DstTy == LLT::scalar(32)) {
  5257. // TODO: SelectionDAG has several alternative expansions to port which may
  5258. // be more reasonble depending on the available instructions. If a target
  5259. // has sitofp, does not have CTLZ, or can efficiently use f64 as an
  5260. // intermediate type, this is probably worse.
  5261. return lowerU64ToF32BitOps(MI);
  5262. }
  5263. return UnableToLegalize;
  5264. }
  5265. LegalizerHelper::LegalizeResult LegalizerHelper::lowerSITOFP(MachineInstr &MI) {
  5266. Register Dst = MI.getOperand(0).getReg();
  5267. Register Src = MI.getOperand(1).getReg();
  5268. LLT DstTy = MRI.getType(Dst);
  5269. LLT SrcTy = MRI.getType(Src);
  5270. const LLT S64 = LLT::scalar(64);
  5271. const LLT S32 = LLT::scalar(32);
  5272. const LLT S1 = LLT::scalar(1);
  5273. if (SrcTy == S1) {
  5274. auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
  5275. auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
  5276. MIRBuilder.buildSelect(Dst, Src, True, False);
  5277. MI.eraseFromParent();
  5278. return Legalized;
  5279. }
  5280. if (SrcTy != S64)
  5281. return UnableToLegalize;
  5282. if (DstTy == S32) {
  5283. // signed cl2f(long l) {
  5284. // long s = l >> 63;
  5285. // float r = cul2f((l + s) ^ s);
  5286. // return s ? -r : r;
  5287. // }
  5288. Register L = Src;
  5289. auto SignBit = MIRBuilder.buildConstant(S64, 63);
  5290. auto S = MIRBuilder.buildAShr(S64, L, SignBit);
  5291. auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
  5292. auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
  5293. auto R = MIRBuilder.buildUITOFP(S32, Xor);
  5294. auto RNeg = MIRBuilder.buildFNeg(S32, R);
  5295. auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
  5296. MIRBuilder.buildConstant(S64, 0));
  5297. MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
  5298. MI.eraseFromParent();
  5299. return Legalized;
  5300. }
  5301. return UnableToLegalize;
  5302. }
  5303. LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOUI(MachineInstr &MI) {
  5304. Register Dst = MI.getOperand(0).getReg();
  5305. Register Src = MI.getOperand(1).getReg();
  5306. LLT DstTy = MRI.getType(Dst);
  5307. LLT SrcTy = MRI.getType(Src);
  5308. const LLT S64 = LLT::scalar(64);
  5309. const LLT S32 = LLT::scalar(32);
  5310. if (SrcTy != S64 && SrcTy != S32)
  5311. return UnableToLegalize;
  5312. if (DstTy != S32 && DstTy != S64)
  5313. return UnableToLegalize;
  5314. // FPTOSI gives same result as FPTOUI for positive signed integers.
  5315. // FPTOUI needs to deal with fp values that convert to unsigned integers
  5316. // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
  5317. APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
  5318. APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
  5319. : APFloat::IEEEdouble(),
  5320. APInt::getZero(SrcTy.getSizeInBits()));
  5321. TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
  5322. MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
  5323. MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
  5324. // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
  5325. // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
  5326. MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
  5327. MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
  5328. MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
  5329. MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
  5330. const LLT S1 = LLT::scalar(1);
  5331. MachineInstrBuilder FCMP =
  5332. MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
  5333. MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
  5334. MI.eraseFromParent();
  5335. return Legalized;
  5336. }
  5337. LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) {
  5338. Register Dst = MI.getOperand(0).getReg();
  5339. Register Src = MI.getOperand(1).getReg();
  5340. LLT DstTy = MRI.getType(Dst);
  5341. LLT SrcTy = MRI.getType(Src);
  5342. const LLT S64 = LLT::scalar(64);
  5343. const LLT S32 = LLT::scalar(32);
  5344. // FIXME: Only f32 to i64 conversions are supported.
  5345. if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64)
  5346. return UnableToLegalize;
  5347. // Expand f32 -> i64 conversion
  5348. // This algorithm comes from compiler-rt's implementation of fixsfdi:
  5349. // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
  5350. unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
  5351. auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000);
  5352. auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23);
  5353. auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
  5354. auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
  5355. auto SignMask = MIRBuilder.buildConstant(SrcTy,
  5356. APInt::getSignMask(SrcEltBits));
  5357. auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask);
  5358. auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
  5359. auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
  5360. Sign = MIRBuilder.buildSExt(DstTy, Sign);
  5361. auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
  5362. auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
  5363. auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000);
  5364. auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
  5365. R = MIRBuilder.buildZExt(DstTy, R);
  5366. auto Bias = MIRBuilder.buildConstant(SrcTy, 127);
  5367. auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias);
  5368. auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit);
  5369. auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent);
  5370. auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent);
  5371. auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub);
  5372. const LLT S1 = LLT::scalar(1);
  5373. auto CmpGt = MIRBuilder.buildICmp(CmpInst::ICMP_SGT,
  5374. S1, Exponent, ExponentLoBit);
  5375. R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
  5376. auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign);
  5377. auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign);
  5378. auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0);
  5379. auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT,
  5380. S1, Exponent, ZeroSrcTy);
  5381. auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0);
  5382. MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
  5383. MI.eraseFromParent();
  5384. return Legalized;
  5385. }
  5386. // f64 -> f16 conversion using round-to-nearest-even rounding mode.
  5387. LegalizerHelper::LegalizeResult
  5388. LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {
  5389. Register Dst = MI.getOperand(0).getReg();
  5390. Register Src = MI.getOperand(1).getReg();
  5391. if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
  5392. return UnableToLegalize;
  5393. const unsigned ExpMask = 0x7ff;
  5394. const unsigned ExpBiasf64 = 1023;
  5395. const unsigned ExpBiasf16 = 15;
  5396. const LLT S32 = LLT::scalar(32);
  5397. const LLT S1 = LLT::scalar(1);
  5398. auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);
  5399. Register U = Unmerge.getReg(0);
  5400. Register UH = Unmerge.getReg(1);
  5401. auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20));
  5402. E = MIRBuilder.buildAnd(S32, E, MIRBuilder.buildConstant(S32, ExpMask));
  5403. // Subtract the fp64 exponent bias (1023) to get the real exponent and
  5404. // add the f16 bias (15) to get the biased exponent for the f16 format.
  5405. E = MIRBuilder.buildAdd(
  5406. S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16));
  5407. auto M = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 8));
  5408. M = MIRBuilder.buildAnd(S32, M, MIRBuilder.buildConstant(S32, 0xffe));
  5409. auto MaskedSig = MIRBuilder.buildAnd(S32, UH,
  5410. MIRBuilder.buildConstant(S32, 0x1ff));
  5411. MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U);
  5412. auto Zero = MIRBuilder.buildConstant(S32, 0);
  5413. auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero);
  5414. auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0);
  5415. M = MIRBuilder.buildOr(S32, M, Lo40Set);
  5416. // (M != 0 ? 0x0200 : 0) | 0x7c00;
  5417. auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200);
  5418. auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero);
  5419. auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero);
  5420. auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00);
  5421. auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00);
  5422. // N = M | (E << 12);
  5423. auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12));
  5424. auto N = MIRBuilder.buildOr(S32, M, EShl12);
  5425. // B = clamp(1-E, 0, 13);
  5426. auto One = MIRBuilder.buildConstant(S32, 1);
  5427. auto OneSubExp = MIRBuilder.buildSub(S32, One, E);
  5428. auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero);
  5429. B = MIRBuilder.buildSMin(S32, B, MIRBuilder.buildConstant(S32, 13));
  5430. auto SigSetHigh = MIRBuilder.buildOr(S32, M,
  5431. MIRBuilder.buildConstant(S32, 0x1000));
  5432. auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B);
  5433. auto D0 = MIRBuilder.buildShl(S32, D, B);
  5434. auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1,
  5435. D0, SigSetHigh);
  5436. auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh);
  5437. D = MIRBuilder.buildOr(S32, D, D1);
  5438. auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One);
  5439. auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N);
  5440. auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7));
  5441. V = MIRBuilder.buildLShr(S32, V, MIRBuilder.buildConstant(S32, 2));
  5442. auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3,
  5443. MIRBuilder.buildConstant(S32, 3));
  5444. auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3);
  5445. auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3,
  5446. MIRBuilder.buildConstant(S32, 5));
  5447. auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5);
  5448. V1 = MIRBuilder.buildOr(S32, V0, V1);
  5449. V = MIRBuilder.buildAdd(S32, V, V1);
  5450. auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1,
  5451. E, MIRBuilder.buildConstant(S32, 30));
  5452. V = MIRBuilder.buildSelect(S32, CmpEGt30,
  5453. MIRBuilder.buildConstant(S32, 0x7c00), V);
  5454. auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1,
  5455. E, MIRBuilder.buildConstant(S32, 1039));
  5456. V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V);
  5457. // Extract the sign bit.
  5458. auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16));
  5459. Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000));
  5460. // Insert the sign bit
  5461. V = MIRBuilder.buildOr(S32, Sign, V);
  5462. MIRBuilder.buildTrunc(Dst, V);
  5463. MI.eraseFromParent();
  5464. return Legalized;
  5465. }
  5466. LegalizerHelper::LegalizeResult
  5467. LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) {
  5468. Register Dst = MI.getOperand(0).getReg();
  5469. Register Src = MI.getOperand(1).getReg();
  5470. LLT DstTy = MRI.getType(Dst);
  5471. LLT SrcTy = MRI.getType(Src);
  5472. const LLT S64 = LLT::scalar(64);
  5473. const LLT S16 = LLT::scalar(16);
  5474. if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64)
  5475. return lowerFPTRUNC_F64_TO_F16(MI);
  5476. return UnableToLegalize;
  5477. }
  5478. // TODO: If RHS is a constant SelectionDAGBuilder expands this into a
  5479. // multiplication tree.
  5480. LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPOWI(MachineInstr &MI) {
  5481. Register Dst = MI.getOperand(0).getReg();
  5482. Register Src0 = MI.getOperand(1).getReg();
  5483. Register Src1 = MI.getOperand(2).getReg();
  5484. LLT Ty = MRI.getType(Dst);
  5485. auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
  5486. MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags());
  5487. MI.eraseFromParent();
  5488. return Legalized;
  5489. }
  5490. static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
  5491. switch (Opc) {
  5492. case TargetOpcode::G_SMIN:
  5493. return CmpInst::ICMP_SLT;
  5494. case TargetOpcode::G_SMAX:
  5495. return CmpInst::ICMP_SGT;
  5496. case TargetOpcode::G_UMIN:
  5497. return CmpInst::ICMP_ULT;
  5498. case TargetOpcode::G_UMAX:
  5499. return CmpInst::ICMP_UGT;
  5500. default:
  5501. llvm_unreachable("not in integer min/max");
  5502. }
  5503. }
  5504. LegalizerHelper::LegalizeResult LegalizerHelper::lowerMinMax(MachineInstr &MI) {
  5505. Register Dst = MI.getOperand(0).getReg();
  5506. Register Src0 = MI.getOperand(1).getReg();
  5507. Register Src1 = MI.getOperand(2).getReg();
  5508. const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
  5509. LLT CmpType = MRI.getType(Dst).changeElementSize(1);
  5510. auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
  5511. MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
  5512. MI.eraseFromParent();
  5513. return Legalized;
  5514. }
  5515. LegalizerHelper::LegalizeResult
  5516. LegalizerHelper::lowerFCopySign(MachineInstr &MI) {
  5517. Register Dst = MI.getOperand(0).getReg();
  5518. Register Src0 = MI.getOperand(1).getReg();
  5519. Register Src1 = MI.getOperand(2).getReg();
  5520. const LLT Src0Ty = MRI.getType(Src0);
  5521. const LLT Src1Ty = MRI.getType(Src1);
  5522. const int Src0Size = Src0Ty.getScalarSizeInBits();
  5523. const int Src1Size = Src1Ty.getScalarSizeInBits();
  5524. auto SignBitMask = MIRBuilder.buildConstant(
  5525. Src0Ty, APInt::getSignMask(Src0Size));
  5526. auto NotSignBitMask = MIRBuilder.buildConstant(
  5527. Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
  5528. Register And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask).getReg(0);
  5529. Register And1;
  5530. if (Src0Ty == Src1Ty) {
  5531. And1 = MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask).getReg(0);
  5532. } else if (Src0Size > Src1Size) {
  5533. auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
  5534. auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
  5535. auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
  5536. And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0);
  5537. } else {
  5538. auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
  5539. auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
  5540. auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
  5541. And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask).getReg(0);
  5542. }
  5543. // Be careful about setting nsz/nnan/ninf on every instruction, since the
  5544. // constants are a nan and -0.0, but the final result should preserve
  5545. // everything.
  5546. unsigned Flags = MI.getFlags();
  5547. MIRBuilder.buildOr(Dst, And0, And1, Flags);
  5548. MI.eraseFromParent();
  5549. return Legalized;
  5550. }
  5551. LegalizerHelper::LegalizeResult
  5552. LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {
  5553. unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ?
  5554. TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
  5555. Register Dst = MI.getOperand(0).getReg();
  5556. Register Src0 = MI.getOperand(1).getReg();
  5557. Register Src1 = MI.getOperand(2).getReg();
  5558. LLT Ty = MRI.getType(Dst);
  5559. if (!MI.getFlag(MachineInstr::FmNoNans)) {
  5560. // Insert canonicalizes if it's possible we need to quiet to get correct
  5561. // sNaN behavior.
  5562. // Note this must be done here, and not as an optimization combine in the
  5563. // absence of a dedicate quiet-snan instruction as we're using an
  5564. // omni-purpose G_FCANONICALIZE.
  5565. if (!isKnownNeverSNaN(Src0, MRI))
  5566. Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
  5567. if (!isKnownNeverSNaN(Src1, MRI))
  5568. Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
  5569. }
  5570. // If there are no nans, it's safe to simply replace this with the non-IEEE
  5571. // version.
  5572. MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
  5573. MI.eraseFromParent();
  5574. return Legalized;
  5575. }
  5576. LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) {
  5577. // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
  5578. Register DstReg = MI.getOperand(0).getReg();
  5579. LLT Ty = MRI.getType(DstReg);
  5580. unsigned Flags = MI.getFlags();
  5581. auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
  5582. Flags);
  5583. MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
  5584. MI.eraseFromParent();
  5585. return Legalized;
  5586. }
  5587. LegalizerHelper::LegalizeResult
  5588. LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) {
  5589. Register DstReg = MI.getOperand(0).getReg();
  5590. Register X = MI.getOperand(1).getReg();
  5591. const unsigned Flags = MI.getFlags();
  5592. const LLT Ty = MRI.getType(DstReg);
  5593. const LLT CondTy = Ty.changeElementSize(1);
  5594. // round(x) =>
  5595. // t = trunc(x);
  5596. // d = fabs(x - t);
  5597. // o = copysign(1.0f, x);
  5598. // return t + (d >= 0.5 ? o : 0.0);
  5599. auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags);
  5600. auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags);
  5601. auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags);
  5602. auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
  5603. auto One = MIRBuilder.buildFConstant(Ty, 1.0);
  5604. auto Half = MIRBuilder.buildFConstant(Ty, 0.5);
  5605. auto SignOne = MIRBuilder.buildFCopysign(Ty, One, X);
  5606. auto Cmp = MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half,
  5607. Flags);
  5608. auto Sel = MIRBuilder.buildSelect(Ty, Cmp, SignOne, Zero, Flags);
  5609. MIRBuilder.buildFAdd(DstReg, T, Sel, Flags);
  5610. MI.eraseFromParent();
  5611. return Legalized;
  5612. }
  5613. LegalizerHelper::LegalizeResult
  5614. LegalizerHelper::lowerFFloor(MachineInstr &MI) {
  5615. Register DstReg = MI.getOperand(0).getReg();
  5616. Register SrcReg = MI.getOperand(1).getReg();
  5617. unsigned Flags = MI.getFlags();
  5618. LLT Ty = MRI.getType(DstReg);
  5619. const LLT CondTy = Ty.changeElementSize(1);
  5620. // result = trunc(src);
  5621. // if (src < 0.0 && src != result)
  5622. // result += -1.0.
  5623. auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
  5624. auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
  5625. auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
  5626. SrcReg, Zero, Flags);
  5627. auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
  5628. SrcReg, Trunc, Flags);
  5629. auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
  5630. auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
  5631. MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
  5632. MI.eraseFromParent();
  5633. return Legalized;
  5634. }
  5635. LegalizerHelper::LegalizeResult
  5636. LegalizerHelper::lowerMergeValues(MachineInstr &MI) {
  5637. const unsigned NumOps = MI.getNumOperands();
  5638. Register DstReg = MI.getOperand(0).getReg();
  5639. Register Src0Reg = MI.getOperand(1).getReg();
  5640. LLT DstTy = MRI.getType(DstReg);
  5641. LLT SrcTy = MRI.getType(Src0Reg);
  5642. unsigned PartSize = SrcTy.getSizeInBits();
  5643. LLT WideTy = LLT::scalar(DstTy.getSizeInBits());
  5644. Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0);
  5645. for (unsigned I = 2; I != NumOps; ++I) {
  5646. const unsigned Offset = (I - 1) * PartSize;
  5647. Register SrcReg = MI.getOperand(I).getReg();
  5648. auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
  5649. Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
  5650. MRI.createGenericVirtualRegister(WideTy);
  5651. auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
  5652. auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
  5653. MIRBuilder.buildOr(NextResult, ResultReg, Shl);
  5654. ResultReg = NextResult;
  5655. }
  5656. if (DstTy.isPointer()) {
  5657. if (MIRBuilder.getDataLayout().isNonIntegralAddressSpace(
  5658. DstTy.getAddressSpace())) {
  5659. LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n");
  5660. return UnableToLegalize;
  5661. }
  5662. MIRBuilder.buildIntToPtr(DstReg, ResultReg);
  5663. }
  5664. MI.eraseFromParent();
  5665. return Legalized;
  5666. }
  5667. LegalizerHelper::LegalizeResult
  5668. LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) {
  5669. const unsigned NumDst = MI.getNumOperands() - 1;
  5670. Register SrcReg = MI.getOperand(NumDst).getReg();
  5671. Register Dst0Reg = MI.getOperand(0).getReg();
  5672. LLT DstTy = MRI.getType(Dst0Reg);
  5673. if (DstTy.isPointer())
  5674. return UnableToLegalize; // TODO
  5675. SrcReg = coerceToScalar(SrcReg);
  5676. if (!SrcReg)
  5677. return UnableToLegalize;
  5678. // Expand scalarizing unmerge as bitcast to integer and shift.
  5679. LLT IntTy = MRI.getType(SrcReg);
  5680. MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
  5681. const unsigned DstSize = DstTy.getSizeInBits();
  5682. unsigned Offset = DstSize;
  5683. for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
  5684. auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
  5685. auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
  5686. MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
  5687. }
  5688. MI.eraseFromParent();
  5689. return Legalized;
  5690. }
  5691. /// Lower a vector extract or insert by writing the vector to a stack temporary
  5692. /// and reloading the element or vector.
  5693. ///
  5694. /// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx
  5695. /// =>
  5696. /// %stack_temp = G_FRAME_INDEX
  5697. /// G_STORE %vec, %stack_temp
  5698. /// %idx = clamp(%idx, %vec.getNumElements())
  5699. /// %element_ptr = G_PTR_ADD %stack_temp, %idx
  5700. /// %dst = G_LOAD %element_ptr
  5701. LegalizerHelper::LegalizeResult
  5702. LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) {
  5703. Register DstReg = MI.getOperand(0).getReg();
  5704. Register SrcVec = MI.getOperand(1).getReg();
  5705. Register InsertVal;
  5706. if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
  5707. InsertVal = MI.getOperand(2).getReg();
  5708. Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
  5709. LLT VecTy = MRI.getType(SrcVec);
  5710. LLT EltTy = VecTy.getElementType();
  5711. unsigned NumElts = VecTy.getNumElements();
  5712. int64_t IdxVal;
  5713. if (mi_match(Idx, MRI, m_ICst(IdxVal)) && IdxVal <= NumElts) {
  5714. SmallVector<Register, 8> SrcRegs;
  5715. extractParts(SrcVec, EltTy, NumElts, SrcRegs);
  5716. if (InsertVal) {
  5717. SrcRegs[IdxVal] = MI.getOperand(2).getReg();
  5718. MIRBuilder.buildMerge(DstReg, SrcRegs);
  5719. } else {
  5720. MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]);
  5721. }
  5722. MI.eraseFromParent();
  5723. return Legalized;
  5724. }
  5725. if (!EltTy.isByteSized()) { // Not implemented.
  5726. LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n");
  5727. return UnableToLegalize;
  5728. }
  5729. unsigned EltBytes = EltTy.getSizeInBytes();
  5730. Align VecAlign = getStackTemporaryAlignment(VecTy);
  5731. Align EltAlign;
  5732. MachinePointerInfo PtrInfo;
  5733. auto StackTemp = createStackTemporary(TypeSize::Fixed(VecTy.getSizeInBytes()),
  5734. VecAlign, PtrInfo);
  5735. MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
  5736. // Get the pointer to the element, and be sure not to hit undefined behavior
  5737. // if the index is out of bounds.
  5738. Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx);
  5739. if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
  5740. int64_t Offset = IdxVal * EltBytes;
  5741. PtrInfo = PtrInfo.getWithOffset(Offset);
  5742. EltAlign = commonAlignment(VecAlign, Offset);
  5743. } else {
  5744. // We lose information with a variable offset.
  5745. EltAlign = getStackTemporaryAlignment(EltTy);
  5746. PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace());
  5747. }
  5748. if (InsertVal) {
  5749. // Write the inserted element
  5750. MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
  5751. // Reload the whole vector.
  5752. MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
  5753. } else {
  5754. MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
  5755. }
  5756. MI.eraseFromParent();
  5757. return Legalized;
  5758. }
  5759. LegalizerHelper::LegalizeResult
  5760. LegalizerHelper::lowerShuffleVector(MachineInstr &MI) {
  5761. Register DstReg = MI.getOperand(0).getReg();
  5762. Register Src0Reg = MI.getOperand(1).getReg();
  5763. Register Src1Reg = MI.getOperand(2).getReg();
  5764. LLT Src0Ty = MRI.getType(Src0Reg);
  5765. LLT DstTy = MRI.getType(DstReg);
  5766. LLT IdxTy = LLT::scalar(32);
  5767. ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
  5768. if (DstTy.isScalar()) {
  5769. if (Src0Ty.isVector())
  5770. return UnableToLegalize;
  5771. // This is just a SELECT.
  5772. assert(Mask.size() == 1 && "Expected a single mask element");
  5773. Register Val;
  5774. if (Mask[0] < 0 || Mask[0] > 1)
  5775. Val = MIRBuilder.buildUndef(DstTy).getReg(0);
  5776. else
  5777. Val = Mask[0] == 0 ? Src0Reg : Src1Reg;
  5778. MIRBuilder.buildCopy(DstReg, Val);
  5779. MI.eraseFromParent();
  5780. return Legalized;
  5781. }
  5782. Register Undef;
  5783. SmallVector<Register, 32> BuildVec;
  5784. LLT EltTy = DstTy.getElementType();
  5785. for (int Idx : Mask) {
  5786. if (Idx < 0) {
  5787. if (!Undef.isValid())
  5788. Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
  5789. BuildVec.push_back(Undef);
  5790. continue;
  5791. }
  5792. if (Src0Ty.isScalar()) {
  5793. BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg);
  5794. } else {
  5795. int NumElts = Src0Ty.getNumElements();
  5796. Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
  5797. int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
  5798. auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
  5799. auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK);
  5800. BuildVec.push_back(Extract.getReg(0));
  5801. }
  5802. }
  5803. MIRBuilder.buildBuildVector(DstReg, BuildVec);
  5804. MI.eraseFromParent();
  5805. return Legalized;
  5806. }
  5807. LegalizerHelper::LegalizeResult
  5808. LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) {
  5809. const auto &MF = *MI.getMF();
  5810. const auto &TFI = *MF.getSubtarget().getFrameLowering();
  5811. if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
  5812. return UnableToLegalize;
  5813. Register Dst = MI.getOperand(0).getReg();
  5814. Register AllocSize = MI.getOperand(1).getReg();
  5815. Align Alignment = assumeAligned(MI.getOperand(2).getImm());
  5816. LLT PtrTy = MRI.getType(Dst);
  5817. LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
  5818. Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
  5819. auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
  5820. SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
  5821. // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
  5822. // have to generate an extra instruction to negate the alloc and then use
  5823. // G_PTR_ADD to add the negative offset.
  5824. auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
  5825. if (Alignment > Align(1)) {
  5826. APInt AlignMask(IntPtrTy.getSizeInBits(), Alignment.value(), true);
  5827. AlignMask.negate();
  5828. auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
  5829. Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
  5830. }
  5831. SPTmp = MIRBuilder.buildCast(PtrTy, Alloc);
  5832. MIRBuilder.buildCopy(SPReg, SPTmp);
  5833. MIRBuilder.buildCopy(Dst, SPTmp);
  5834. MI.eraseFromParent();
  5835. return Legalized;
  5836. }
  5837. LegalizerHelper::LegalizeResult
  5838. LegalizerHelper::lowerExtract(MachineInstr &MI) {
  5839. Register Dst = MI.getOperand(0).getReg();
  5840. Register Src = MI.getOperand(1).getReg();
  5841. unsigned Offset = MI.getOperand(2).getImm();
  5842. LLT DstTy = MRI.getType(Dst);
  5843. LLT SrcTy = MRI.getType(Src);
  5844. // Extract sub-vector or one element
  5845. if (SrcTy.isVector()) {
  5846. unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
  5847. unsigned DstSize = DstTy.getSizeInBits();
  5848. if ((Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
  5849. (Offset + DstSize <= SrcTy.getSizeInBits())) {
  5850. // Unmerge and allow access to each Src element for the artifact combiner.
  5851. auto Unmerge = MIRBuilder.buildUnmerge(SrcTy.getElementType(), Src);
  5852. // Take element(s) we need to extract and copy it (merge them).
  5853. SmallVector<Register, 8> SubVectorElts;
  5854. for (unsigned Idx = Offset / SrcEltSize;
  5855. Idx < (Offset + DstSize) / SrcEltSize; ++Idx) {
  5856. SubVectorElts.push_back(Unmerge.getReg(Idx));
  5857. }
  5858. if (SubVectorElts.size() == 1)
  5859. MIRBuilder.buildCopy(Dst, SubVectorElts[0]);
  5860. else
  5861. MIRBuilder.buildMerge(Dst, SubVectorElts);
  5862. MI.eraseFromParent();
  5863. return Legalized;
  5864. }
  5865. }
  5866. if (DstTy.isScalar() &&
  5867. (SrcTy.isScalar() ||
  5868. (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
  5869. LLT SrcIntTy = SrcTy;
  5870. if (!SrcTy.isScalar()) {
  5871. SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
  5872. Src = MIRBuilder.buildBitcast(SrcIntTy, Src).getReg(0);
  5873. }
  5874. if (Offset == 0)
  5875. MIRBuilder.buildTrunc(Dst, Src);
  5876. else {
  5877. auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
  5878. auto Shr = MIRBuilder.buildLShr(SrcIntTy, Src, ShiftAmt);
  5879. MIRBuilder.buildTrunc(Dst, Shr);
  5880. }
  5881. MI.eraseFromParent();
  5882. return Legalized;
  5883. }
  5884. return UnableToLegalize;
  5885. }
  5886. LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) {
  5887. Register Dst = MI.getOperand(0).getReg();
  5888. Register Src = MI.getOperand(1).getReg();
  5889. Register InsertSrc = MI.getOperand(2).getReg();
  5890. uint64_t Offset = MI.getOperand(3).getImm();
  5891. LLT DstTy = MRI.getType(Src);
  5892. LLT InsertTy = MRI.getType(InsertSrc);
  5893. // Insert sub-vector or one element
  5894. if (DstTy.isVector() && !InsertTy.isPointer()) {
  5895. LLT EltTy = DstTy.getElementType();
  5896. unsigned EltSize = EltTy.getSizeInBits();
  5897. unsigned InsertSize = InsertTy.getSizeInBits();
  5898. if ((Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
  5899. (Offset + InsertSize <= DstTy.getSizeInBits())) {
  5900. auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, Src);
  5901. SmallVector<Register, 8> DstElts;
  5902. unsigned Idx = 0;
  5903. // Elements from Src before insert start Offset
  5904. for (; Idx < Offset / EltSize; ++Idx) {
  5905. DstElts.push_back(UnmergeSrc.getReg(Idx));
  5906. }
  5907. // Replace elements in Src with elements from InsertSrc
  5908. if (InsertTy.getSizeInBits() > EltSize) {
  5909. auto UnmergeInsertSrc = MIRBuilder.buildUnmerge(EltTy, InsertSrc);
  5910. for (unsigned i = 0; Idx < (Offset + InsertSize) / EltSize;
  5911. ++Idx, ++i) {
  5912. DstElts.push_back(UnmergeInsertSrc.getReg(i));
  5913. }
  5914. } else {
  5915. DstElts.push_back(InsertSrc);
  5916. ++Idx;
  5917. }
  5918. // Remaining elements from Src after insert
  5919. for (; Idx < DstTy.getNumElements(); ++Idx) {
  5920. DstElts.push_back(UnmergeSrc.getReg(Idx));
  5921. }
  5922. MIRBuilder.buildMerge(Dst, DstElts);
  5923. MI.eraseFromParent();
  5924. return Legalized;
  5925. }
  5926. }
  5927. if (InsertTy.isVector() ||
  5928. (DstTy.isVector() && DstTy.getElementType() != InsertTy))
  5929. return UnableToLegalize;
  5930. const DataLayout &DL = MIRBuilder.getDataLayout();
  5931. if ((DstTy.isPointer() &&
  5932. DL.isNonIntegralAddressSpace(DstTy.getAddressSpace())) ||
  5933. (InsertTy.isPointer() &&
  5934. DL.isNonIntegralAddressSpace(InsertTy.getAddressSpace()))) {
  5935. LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
  5936. return UnableToLegalize;
  5937. }
  5938. LLT IntDstTy = DstTy;
  5939. if (!DstTy.isScalar()) {
  5940. IntDstTy = LLT::scalar(DstTy.getSizeInBits());
  5941. Src = MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
  5942. }
  5943. if (!InsertTy.isScalar()) {
  5944. const LLT IntInsertTy = LLT::scalar(InsertTy.getSizeInBits());
  5945. InsertSrc = MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
  5946. }
  5947. Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
  5948. if (Offset != 0) {
  5949. auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
  5950. ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
  5951. }
  5952. APInt MaskVal = APInt::getBitsSetWithWrap(
  5953. DstTy.getSizeInBits(), Offset + InsertTy.getSizeInBits(), Offset);
  5954. auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
  5955. auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
  5956. auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
  5957. MIRBuilder.buildCast(Dst, Or);
  5958. MI.eraseFromParent();
  5959. return Legalized;
  5960. }
  5961. LegalizerHelper::LegalizeResult
  5962. LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) {
  5963. Register Dst0 = MI.getOperand(0).getReg();
  5964. Register Dst1 = MI.getOperand(1).getReg();
  5965. Register LHS = MI.getOperand(2).getReg();
  5966. Register RHS = MI.getOperand(3).getReg();
  5967. const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
  5968. LLT Ty = MRI.getType(Dst0);
  5969. LLT BoolTy = MRI.getType(Dst1);
  5970. if (IsAdd)
  5971. MIRBuilder.buildAdd(Dst0, LHS, RHS);
  5972. else
  5973. MIRBuilder.buildSub(Dst0, LHS, RHS);
  5974. // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
  5975. auto Zero = MIRBuilder.buildConstant(Ty, 0);
  5976. // For an addition, the result should be less than one of the operands (LHS)
  5977. // if and only if the other operand (RHS) is negative, otherwise there will
  5978. // be overflow.
  5979. // For a subtraction, the result should be less than one of the operands
  5980. // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
  5981. // otherwise there will be overflow.
  5982. auto ResultLowerThanLHS =
  5983. MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, Dst0, LHS);
  5984. auto ConditionRHS = MIRBuilder.buildICmp(
  5985. IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
  5986. MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
  5987. MI.eraseFromParent();
  5988. return Legalized;
  5989. }
  5990. LegalizerHelper::LegalizeResult
  5991. LegalizerHelper::lowerAddSubSatToMinMax(MachineInstr &MI) {
  5992. Register Res = MI.getOperand(0).getReg();
  5993. Register LHS = MI.getOperand(1).getReg();
  5994. Register RHS = MI.getOperand(2).getReg();
  5995. LLT Ty = MRI.getType(Res);
  5996. bool IsSigned;
  5997. bool IsAdd;
  5998. unsigned BaseOp;
  5999. switch (MI.getOpcode()) {
  6000. default:
  6001. llvm_unreachable("unexpected addsat/subsat opcode");
  6002. case TargetOpcode::G_UADDSAT:
  6003. IsSigned = false;
  6004. IsAdd = true;
  6005. BaseOp = TargetOpcode::G_ADD;
  6006. break;
  6007. case TargetOpcode::G_SADDSAT:
  6008. IsSigned = true;
  6009. IsAdd = true;
  6010. BaseOp = TargetOpcode::G_ADD;
  6011. break;
  6012. case TargetOpcode::G_USUBSAT:
  6013. IsSigned = false;
  6014. IsAdd = false;
  6015. BaseOp = TargetOpcode::G_SUB;
  6016. break;
  6017. case TargetOpcode::G_SSUBSAT:
  6018. IsSigned = true;
  6019. IsAdd = false;
  6020. BaseOp = TargetOpcode::G_SUB;
  6021. break;
  6022. }
  6023. if (IsSigned) {
  6024. // sadd.sat(a, b) ->
  6025. // hi = 0x7fffffff - smax(a, 0)
  6026. // lo = 0x80000000 - smin(a, 0)
  6027. // a + smin(smax(lo, b), hi)
  6028. // ssub.sat(a, b) ->
  6029. // lo = smax(a, -1) - 0x7fffffff
  6030. // hi = smin(a, -1) - 0x80000000
  6031. // a - smin(smax(lo, b), hi)
  6032. // TODO: AMDGPU can use a "median of 3" instruction here:
  6033. // a +/- med3(lo, b, hi)
  6034. uint64_t NumBits = Ty.getScalarSizeInBits();
  6035. auto MaxVal =
  6036. MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(NumBits));
  6037. auto MinVal =
  6038. MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
  6039. MachineInstrBuilder Hi, Lo;
  6040. if (IsAdd) {
  6041. auto Zero = MIRBuilder.buildConstant(Ty, 0);
  6042. Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero));
  6043. Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero));
  6044. } else {
  6045. auto NegOne = MIRBuilder.buildConstant(Ty, -1);
  6046. Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne),
  6047. MaxVal);
  6048. Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne),
  6049. MinVal);
  6050. }
  6051. auto RHSClamped =
  6052. MIRBuilder.buildSMin(Ty, MIRBuilder.buildSMax(Ty, Lo, RHS), Hi);
  6053. MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
  6054. } else {
  6055. // uadd.sat(a, b) -> a + umin(~a, b)
  6056. // usub.sat(a, b) -> a - umin(a, b)
  6057. Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS;
  6058. auto Min = MIRBuilder.buildUMin(Ty, Not, RHS);
  6059. MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
  6060. }
  6061. MI.eraseFromParent();
  6062. return Legalized;
  6063. }
  6064. LegalizerHelper::LegalizeResult
  6065. LegalizerHelper::lowerAddSubSatToAddoSubo(MachineInstr &MI) {
  6066. Register Res = MI.getOperand(0).getReg();
  6067. Register LHS = MI.getOperand(1).getReg();
  6068. Register RHS = MI.getOperand(2).getReg();
  6069. LLT Ty = MRI.getType(Res);
  6070. LLT BoolTy = Ty.changeElementSize(1);
  6071. bool IsSigned;
  6072. bool IsAdd;
  6073. unsigned OverflowOp;
  6074. switch (MI.getOpcode()) {
  6075. default:
  6076. llvm_unreachable("unexpected addsat/subsat opcode");
  6077. case TargetOpcode::G_UADDSAT:
  6078. IsSigned = false;
  6079. IsAdd = true;
  6080. OverflowOp = TargetOpcode::G_UADDO;
  6081. break;
  6082. case TargetOpcode::G_SADDSAT:
  6083. IsSigned = true;
  6084. IsAdd = true;
  6085. OverflowOp = TargetOpcode::G_SADDO;
  6086. break;
  6087. case TargetOpcode::G_USUBSAT:
  6088. IsSigned = false;
  6089. IsAdd = false;
  6090. OverflowOp = TargetOpcode::G_USUBO;
  6091. break;
  6092. case TargetOpcode::G_SSUBSAT:
  6093. IsSigned = true;
  6094. IsAdd = false;
  6095. OverflowOp = TargetOpcode::G_SSUBO;
  6096. break;
  6097. }
  6098. auto OverflowRes =
  6099. MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
  6100. Register Tmp = OverflowRes.getReg(0);
  6101. Register Ov = OverflowRes.getReg(1);
  6102. MachineInstrBuilder Clamp;
  6103. if (IsSigned) {
  6104. // sadd.sat(a, b) ->
  6105. // {tmp, ov} = saddo(a, b)
  6106. // ov ? (tmp >>s 31) + 0x80000000 : r
  6107. // ssub.sat(a, b) ->
  6108. // {tmp, ov} = ssubo(a, b)
  6109. // ov ? (tmp >>s 31) + 0x80000000 : r
  6110. uint64_t NumBits = Ty.getScalarSizeInBits();
  6111. auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1);
  6112. auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
  6113. auto MinVal =
  6114. MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
  6115. Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal);
  6116. } else {
  6117. // uadd.sat(a, b) ->
  6118. // {tmp, ov} = uaddo(a, b)
  6119. // ov ? 0xffffffff : tmp
  6120. // usub.sat(a, b) ->
  6121. // {tmp, ov} = usubo(a, b)
  6122. // ov ? 0 : tmp
  6123. Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
  6124. }
  6125. MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp);
  6126. MI.eraseFromParent();
  6127. return Legalized;
  6128. }
  6129. LegalizerHelper::LegalizeResult
  6130. LegalizerHelper::lowerShlSat(MachineInstr &MI) {
  6131. assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
  6132. MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
  6133. "Expected shlsat opcode!");
  6134. bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT;
  6135. Register Res = MI.getOperand(0).getReg();
  6136. Register LHS = MI.getOperand(1).getReg();
  6137. Register RHS = MI.getOperand(2).getReg();
  6138. LLT Ty = MRI.getType(Res);
  6139. LLT BoolTy = Ty.changeElementSize(1);
  6140. unsigned BW = Ty.getScalarSizeInBits();
  6141. auto Result = MIRBuilder.buildShl(Ty, LHS, RHS);
  6142. auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS)
  6143. : MIRBuilder.buildLShr(Ty, Result, RHS);
  6144. MachineInstrBuilder SatVal;
  6145. if (IsSigned) {
  6146. auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW));
  6147. auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW));
  6148. auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS,
  6149. MIRBuilder.buildConstant(Ty, 0));
  6150. SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
  6151. } else {
  6152. SatVal = MIRBuilder.buildConstant(Ty, APInt::getMaxValue(BW));
  6153. }
  6154. auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig);
  6155. MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
  6156. MI.eraseFromParent();
  6157. return Legalized;
  6158. }
  6159. LegalizerHelper::LegalizeResult
  6160. LegalizerHelper::lowerBswap(MachineInstr &MI) {
  6161. Register Dst = MI.getOperand(0).getReg();
  6162. Register Src = MI.getOperand(1).getReg();
  6163. const LLT Ty = MRI.getType(Src);
  6164. unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
  6165. unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
  6166. // Swap most and least significant byte, set remaining bytes in Res to zero.
  6167. auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
  6168. auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
  6169. auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
  6170. auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
  6171. // Set i-th high/low byte in Res to i-th low/high byte from Src.
  6172. for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
  6173. // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
  6174. APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
  6175. auto Mask = MIRBuilder.buildConstant(Ty, APMask);
  6176. auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
  6177. // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
  6178. auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
  6179. auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
  6180. Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
  6181. // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
  6182. auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
  6183. auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
  6184. Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
  6185. }
  6186. Res.getInstr()->getOperand(0).setReg(Dst);
  6187. MI.eraseFromParent();
  6188. return Legalized;
  6189. }
  6190. //{ (Src & Mask) >> N } | { (Src << N) & Mask }
  6191. static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B,
  6192. MachineInstrBuilder Src, APInt Mask) {
  6193. const LLT Ty = Dst.getLLTTy(*B.getMRI());
  6194. MachineInstrBuilder C_N = B.buildConstant(Ty, N);
  6195. MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
  6196. auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
  6197. auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
  6198. return B.buildOr(Dst, LHS, RHS);
  6199. }
  6200. LegalizerHelper::LegalizeResult
  6201. LegalizerHelper::lowerBitreverse(MachineInstr &MI) {
  6202. Register Dst = MI.getOperand(0).getReg();
  6203. Register Src = MI.getOperand(1).getReg();
  6204. const LLT Ty = MRI.getType(Src);
  6205. unsigned Size = Ty.getSizeInBits();
  6206. MachineInstrBuilder BSWAP =
  6207. MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src});
  6208. // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
  6209. // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
  6210. // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
  6211. MachineInstrBuilder Swap4 =
  6212. SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0)));
  6213. // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
  6214. // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
  6215. // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
  6216. MachineInstrBuilder Swap2 =
  6217. SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC)));
  6218. // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5 6|7
  6219. // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
  6220. // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
  6221. SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
  6222. MI.eraseFromParent();
  6223. return Legalized;
  6224. }
  6225. LegalizerHelper::LegalizeResult
  6226. LegalizerHelper::lowerReadWriteRegister(MachineInstr &MI) {
  6227. MachineFunction &MF = MIRBuilder.getMF();
  6228. bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
  6229. int NameOpIdx = IsRead ? 1 : 0;
  6230. int ValRegIndex = IsRead ? 0 : 1;
  6231. Register ValReg = MI.getOperand(ValRegIndex).getReg();
  6232. const LLT Ty = MRI.getType(ValReg);
  6233. const MDString *RegStr = cast<MDString>(
  6234. cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
  6235. Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF);
  6236. if (!PhysReg.isValid())
  6237. return UnableToLegalize;
  6238. if (IsRead)
  6239. MIRBuilder.buildCopy(ValReg, PhysReg);
  6240. else
  6241. MIRBuilder.buildCopy(PhysReg, ValReg);
  6242. MI.eraseFromParent();
  6243. return Legalized;
  6244. }
  6245. LegalizerHelper::LegalizeResult
  6246. LegalizerHelper::lowerSMULH_UMULH(MachineInstr &MI) {
  6247. bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH;
  6248. unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
  6249. Register Result = MI.getOperand(0).getReg();
  6250. LLT OrigTy = MRI.getType(Result);
  6251. auto SizeInBits = OrigTy.getScalarSizeInBits();
  6252. LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2);
  6253. auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)});
  6254. auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)});
  6255. auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS);
  6256. unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
  6257. auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits);
  6258. auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt});
  6259. MIRBuilder.buildTrunc(Result, Shifted);
  6260. MI.eraseFromParent();
  6261. return Legalized;
  6262. }
  6263. LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) {
  6264. // Implement vector G_SELECT in terms of XOR, AND, OR.
  6265. Register DstReg = MI.getOperand(0).getReg();
  6266. Register MaskReg = MI.getOperand(1).getReg();
  6267. Register Op1Reg = MI.getOperand(2).getReg();
  6268. Register Op2Reg = MI.getOperand(3).getReg();
  6269. LLT DstTy = MRI.getType(DstReg);
  6270. LLT MaskTy = MRI.getType(MaskReg);
  6271. LLT Op1Ty = MRI.getType(Op1Reg);
  6272. if (!DstTy.isVector())
  6273. return UnableToLegalize;
  6274. // Vector selects can have a scalar predicate. If so, splat into a vector and
  6275. // finish for later legalization attempts to try again.
  6276. if (MaskTy.isScalar()) {
  6277. Register MaskElt = MaskReg;
  6278. if (MaskTy.getSizeInBits() < DstTy.getScalarSizeInBits())
  6279. MaskElt = MIRBuilder.buildSExt(DstTy.getElementType(), MaskElt).getReg(0);
  6280. // Generate a vector splat idiom to be pattern matched later.
  6281. auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
  6282. Observer.changingInstr(MI);
  6283. MI.getOperand(1).setReg(ShufSplat.getReg(0));
  6284. Observer.changedInstr(MI);
  6285. return Legalized;
  6286. }
  6287. if (MaskTy.getSizeInBits() != Op1Ty.getSizeInBits()) {
  6288. return UnableToLegalize;
  6289. }
  6290. auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg);
  6291. auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
  6292. auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
  6293. MIRBuilder.buildOr(DstReg, NewOp1, NewOp2);
  6294. MI.eraseFromParent();
  6295. return Legalized;
  6296. }
  6297. LegalizerHelper::LegalizeResult LegalizerHelper::lowerDIVREM(MachineInstr &MI) {
  6298. // Split DIVREM into individual instructions.
  6299. unsigned Opcode = MI.getOpcode();
  6300. MIRBuilder.buildInstr(
  6301. Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
  6302. : TargetOpcode::G_UDIV,
  6303. {MI.getOperand(0).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
  6304. MIRBuilder.buildInstr(
  6305. Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
  6306. : TargetOpcode::G_UREM,
  6307. {MI.getOperand(1).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
  6308. MI.eraseFromParent();
  6309. return Legalized;
  6310. }
  6311. LegalizerHelper::LegalizeResult
  6312. LegalizerHelper::lowerAbsToAddXor(MachineInstr &MI) {
  6313. // Expand %res = G_ABS %a into:
  6314. // %v1 = G_ASHR %a, scalar_size-1
  6315. // %v2 = G_ADD %a, %v1
  6316. // %res = G_XOR %v2, %v1
  6317. LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
  6318. Register OpReg = MI.getOperand(1).getReg();
  6319. auto ShiftAmt =
  6320. MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1);
  6321. auto Shift = MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
  6322. auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift);
  6323. MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift);
  6324. MI.eraseFromParent();
  6325. return Legalized;
  6326. }
  6327. LegalizerHelper::LegalizeResult
  6328. LegalizerHelper::lowerAbsToMaxNeg(MachineInstr &MI) {
  6329. // Expand %res = G_ABS %a into:
  6330. // %v1 = G_CONSTANT 0
  6331. // %v2 = G_SUB %v1, %a
  6332. // %res = G_SMAX %a, %v2
  6333. Register SrcReg = MI.getOperand(1).getReg();
  6334. LLT Ty = MRI.getType(SrcReg);
  6335. auto Zero = MIRBuilder.buildConstant(Ty, 0).getReg(0);
  6336. auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
  6337. MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub);
  6338. MI.eraseFromParent();
  6339. return Legalized;
  6340. }
  6341. LegalizerHelper::LegalizeResult
  6342. LegalizerHelper::lowerVectorReduction(MachineInstr &MI) {
  6343. Register SrcReg = MI.getOperand(1).getReg();
  6344. LLT SrcTy = MRI.getType(SrcReg);
  6345. LLT DstTy = MRI.getType(SrcReg);
  6346. // The source could be a scalar if the IR type was <1 x sN>.
  6347. if (SrcTy.isScalar()) {
  6348. if (DstTy.getSizeInBits() > SrcTy.getSizeInBits())
  6349. return UnableToLegalize; // FIXME: handle extension.
  6350. // This can be just a plain copy.
  6351. Observer.changingInstr(MI);
  6352. MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::COPY));
  6353. Observer.changedInstr(MI);
  6354. return Legalized;
  6355. }
  6356. return UnableToLegalize;;
  6357. }
  6358. static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
  6359. // On Darwin, -Os means optimize for size without hurting performance, so
  6360. // only really optimize for size when -Oz (MinSize) is used.
  6361. if (MF.getTarget().getTargetTriple().isOSDarwin())
  6362. return MF.getFunction().hasMinSize();
  6363. return MF.getFunction().hasOptSize();
  6364. }
  6365. // Returns a list of types to use for memory op lowering in MemOps. A partial
  6366. // port of findOptimalMemOpLowering in TargetLowering.
  6367. static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
  6368. unsigned Limit, const MemOp &Op,
  6369. unsigned DstAS, unsigned SrcAS,
  6370. const AttributeList &FuncAttributes,
  6371. const TargetLowering &TLI) {
  6372. if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
  6373. return false;
  6374. LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
  6375. if (Ty == LLT()) {
  6376. // Use the largest scalar type whose alignment constraints are satisfied.
  6377. // We only need to check DstAlign here as SrcAlign is always greater or
  6378. // equal to DstAlign (or zero).
  6379. Ty = LLT::scalar(64);
  6380. if (Op.isFixedDstAlign())
  6381. while (Op.getDstAlign() < Ty.getSizeInBytes() &&
  6382. !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
  6383. Ty = LLT::scalar(Ty.getSizeInBytes());
  6384. assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
  6385. // FIXME: check for the largest legal type we can load/store to.
  6386. }
  6387. unsigned NumMemOps = 0;
  6388. uint64_t Size = Op.size();
  6389. while (Size) {
  6390. unsigned TySize = Ty.getSizeInBytes();
  6391. while (TySize > Size) {
  6392. // For now, only use non-vector load / store's for the left-over pieces.
  6393. LLT NewTy = Ty;
  6394. // FIXME: check for mem op safety and legality of the types. Not all of
  6395. // SDAGisms map cleanly to GISel concepts.
  6396. if (NewTy.isVector())
  6397. NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
  6398. NewTy = LLT::scalar(PowerOf2Floor(NewTy.getSizeInBits() - 1));
  6399. unsigned NewTySize = NewTy.getSizeInBytes();
  6400. assert(NewTySize > 0 && "Could not find appropriate type");
  6401. // If the new LLT cannot cover all of the remaining bits, then consider
  6402. // issuing a (or a pair of) unaligned and overlapping load / store.
  6403. bool Fast;
  6404. // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
  6405. MVT VT = getMVTForLLT(Ty);
  6406. if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
  6407. TLI.allowsMisalignedMemoryAccesses(
  6408. VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
  6409. MachineMemOperand::MONone, &Fast) &&
  6410. Fast)
  6411. TySize = Size;
  6412. else {
  6413. Ty = NewTy;
  6414. TySize = NewTySize;
  6415. }
  6416. }
  6417. if (++NumMemOps > Limit)
  6418. return false;
  6419. MemOps.push_back(Ty);
  6420. Size -= TySize;
  6421. }
  6422. return true;
  6423. }
  6424. static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
  6425. if (Ty.isVector())
  6426. return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
  6427. Ty.getNumElements());
  6428. return IntegerType::get(C, Ty.getSizeInBits());
  6429. }
  6430. // Get a vectorized representation of the memset value operand, GISel edition.
  6431. static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
  6432. MachineRegisterInfo &MRI = *MIB.getMRI();
  6433. unsigned NumBits = Ty.getScalarSizeInBits();
  6434. auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
  6435. if (!Ty.isVector() && ValVRegAndVal) {
  6436. APInt Scalar = ValVRegAndVal->Value.truncOrSelf(8);
  6437. APInt SplatVal = APInt::getSplat(NumBits, Scalar);
  6438. return MIB.buildConstant(Ty, SplatVal).getReg(0);
  6439. }
  6440. // Extend the byte value to the larger type, and then multiply by a magic
  6441. // value 0x010101... in order to replicate it across every byte.
  6442. // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
  6443. if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
  6444. return MIB.buildConstant(Ty, 0).getReg(0);
  6445. }
  6446. LLT ExtType = Ty.getScalarType();
  6447. auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
  6448. if (NumBits > 8) {
  6449. APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
  6450. auto MagicMI = MIB.buildConstant(ExtType, Magic);
  6451. Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
  6452. }
  6453. // For vector types create a G_BUILD_VECTOR.
  6454. if (Ty.isVector())
  6455. Val = MIB.buildSplatVector(Ty, Val).getReg(0);
  6456. return Val;
  6457. }
  6458. LegalizerHelper::LegalizeResult
  6459. LegalizerHelper::lowerMemset(MachineInstr &MI, Register Dst, Register Val,
  6460. uint64_t KnownLen, Align Alignment,
  6461. bool IsVolatile) {
  6462. auto &MF = *MI.getParent()->getParent();
  6463. const auto &TLI = *MF.getSubtarget().getTargetLowering();
  6464. auto &DL = MF.getDataLayout();
  6465. LLVMContext &C = MF.getFunction().getContext();
  6466. assert(KnownLen != 0 && "Have a zero length memset length!");
  6467. bool DstAlignCanChange = false;
  6468. MachineFrameInfo &MFI = MF.getFrameInfo();
  6469. bool OptSize = shouldLowerMemFuncForSize(MF);
  6470. MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
  6471. if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
  6472. DstAlignCanChange = true;
  6473. unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
  6474. std::vector<LLT> MemOps;
  6475. const auto &DstMMO = **MI.memoperands_begin();
  6476. MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
  6477. auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
  6478. bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
  6479. if (!findGISelOptimalMemOpLowering(MemOps, Limit,
  6480. MemOp::Set(KnownLen, DstAlignCanChange,
  6481. Alignment,
  6482. /*IsZeroMemset=*/IsZeroVal,
  6483. /*IsVolatile=*/IsVolatile),
  6484. DstPtrInfo.getAddrSpace(), ~0u,
  6485. MF.getFunction().getAttributes(), TLI))
  6486. return UnableToLegalize;
  6487. if (DstAlignCanChange) {
  6488. // Get an estimate of the type from the LLT.
  6489. Type *IRTy = getTypeForLLT(MemOps[0], C);
  6490. Align NewAlign = DL.getABITypeAlign(IRTy);
  6491. if (NewAlign > Alignment) {
  6492. Alignment = NewAlign;
  6493. unsigned FI = FIDef->getOperand(1).getIndex();
  6494. // Give the stack frame object a larger alignment if needed.
  6495. if (MFI.getObjectAlign(FI) < Alignment)
  6496. MFI.setObjectAlignment(FI, Alignment);
  6497. }
  6498. }
  6499. MachineIRBuilder MIB(MI);
  6500. // Find the largest store and generate the bit pattern for it.
  6501. LLT LargestTy = MemOps[0];
  6502. for (unsigned i = 1; i < MemOps.size(); i++)
  6503. if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
  6504. LargestTy = MemOps[i];
  6505. // The memset stored value is always defined as an s8, so in order to make it
  6506. // work with larger store types we need to repeat the bit pattern across the
  6507. // wider type.
  6508. Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
  6509. if (!MemSetValue)
  6510. return UnableToLegalize;
  6511. // Generate the stores. For each store type in the list, we generate the
  6512. // matching store of that type to the destination address.
  6513. LLT PtrTy = MRI.getType(Dst);
  6514. unsigned DstOff = 0;
  6515. unsigned Size = KnownLen;
  6516. for (unsigned I = 0; I < MemOps.size(); I++) {
  6517. LLT Ty = MemOps[I];
  6518. unsigned TySize = Ty.getSizeInBytes();
  6519. if (TySize > Size) {
  6520. // Issuing an unaligned load / store pair that overlaps with the previous
  6521. // pair. Adjust the offset accordingly.
  6522. assert(I == MemOps.size() - 1 && I != 0);
  6523. DstOff -= TySize - Size;
  6524. }
  6525. // If this store is smaller than the largest store see whether we can get
  6526. // the smaller value for free with a truncate.
  6527. Register Value = MemSetValue;
  6528. if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
  6529. MVT VT = getMVTForLLT(Ty);
  6530. MVT LargestVT = getMVTForLLT(LargestTy);
  6531. if (!LargestTy.isVector() && !Ty.isVector() &&
  6532. TLI.isTruncateFree(LargestVT, VT))
  6533. Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
  6534. else
  6535. Value = getMemsetValue(Val, Ty, MIB);
  6536. if (!Value)
  6537. return UnableToLegalize;
  6538. }
  6539. auto *StoreMMO = MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
  6540. Register Ptr = Dst;
  6541. if (DstOff != 0) {
  6542. auto Offset =
  6543. MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
  6544. Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
  6545. }
  6546. MIB.buildStore(Value, Ptr, *StoreMMO);
  6547. DstOff += Ty.getSizeInBytes();
  6548. Size -= TySize;
  6549. }
  6550. MI.eraseFromParent();
  6551. return Legalized;
  6552. }
  6553. LegalizerHelper::LegalizeResult
  6554. LegalizerHelper::lowerMemcpyInline(MachineInstr &MI) {
  6555. assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
  6556. Register Dst = MI.getOperand(0).getReg();
  6557. Register Src = MI.getOperand(1).getReg();
  6558. Register Len = MI.getOperand(2).getReg();
  6559. const auto *MMOIt = MI.memoperands_begin();
  6560. const MachineMemOperand *MemOp = *MMOIt;
  6561. bool IsVolatile = MemOp->isVolatile();
  6562. // See if this is a constant length copy
  6563. auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
  6564. // FIXME: support dynamically sized G_MEMCPY_INLINE
  6565. assert(LenVRegAndVal.hasValue() &&
  6566. "inline memcpy with dynamic size is not yet supported");
  6567. uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
  6568. if (KnownLen == 0) {
  6569. MI.eraseFromParent();
  6570. return Legalized;
  6571. }
  6572. const auto &DstMMO = **MI.memoperands_begin();
  6573. const auto &SrcMMO = **std::next(MI.memoperands_begin());
  6574. Align DstAlign = DstMMO.getBaseAlign();
  6575. Align SrcAlign = SrcMMO.getBaseAlign();
  6576. return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
  6577. IsVolatile);
  6578. }
  6579. LegalizerHelper::LegalizeResult
  6580. LegalizerHelper::lowerMemcpyInline(MachineInstr &MI, Register Dst, Register Src,
  6581. uint64_t KnownLen, Align DstAlign,
  6582. Align SrcAlign, bool IsVolatile) {
  6583. assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
  6584. return lowerMemcpy(MI, Dst, Src, KnownLen,
  6585. std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
  6586. IsVolatile);
  6587. }
  6588. LegalizerHelper::LegalizeResult
  6589. LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
  6590. uint64_t KnownLen, uint64_t Limit, Align DstAlign,
  6591. Align SrcAlign, bool IsVolatile) {
  6592. auto &MF = *MI.getParent()->getParent();
  6593. const auto &TLI = *MF.getSubtarget().getTargetLowering();
  6594. auto &DL = MF.getDataLayout();
  6595. LLVMContext &C = MF.getFunction().getContext();
  6596. assert(KnownLen != 0 && "Have a zero length memcpy length!");
  6597. bool DstAlignCanChange = false;
  6598. MachineFrameInfo &MFI = MF.getFrameInfo();
  6599. Align Alignment = commonAlignment(DstAlign, SrcAlign);
  6600. MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
  6601. if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
  6602. DstAlignCanChange = true;
  6603. // FIXME: infer better src pointer alignment like SelectionDAG does here.
  6604. // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
  6605. // if the memcpy is in a tail call position.
  6606. std::vector<LLT> MemOps;
  6607. const auto &DstMMO = **MI.memoperands_begin();
  6608. const auto &SrcMMO = **std::next(MI.memoperands_begin());
  6609. MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
  6610. MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
  6611. if (!findGISelOptimalMemOpLowering(
  6612. MemOps, Limit,
  6613. MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
  6614. IsVolatile),
  6615. DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
  6616. MF.getFunction().getAttributes(), TLI))
  6617. return UnableToLegalize;
  6618. if (DstAlignCanChange) {
  6619. // Get an estimate of the type from the LLT.
  6620. Type *IRTy = getTypeForLLT(MemOps[0], C);
  6621. Align NewAlign = DL.getABITypeAlign(IRTy);
  6622. // Don't promote to an alignment that would require dynamic stack
  6623. // realignment.
  6624. const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
  6625. if (!TRI->hasStackRealignment(MF))
  6626. while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
  6627. NewAlign = NewAlign / 2;
  6628. if (NewAlign > Alignment) {
  6629. Alignment = NewAlign;
  6630. unsigned FI = FIDef->getOperand(1).getIndex();
  6631. // Give the stack frame object a larger alignment if needed.
  6632. if (MFI.getObjectAlign(FI) < Alignment)
  6633. MFI.setObjectAlignment(FI, Alignment);
  6634. }
  6635. }
  6636. LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
  6637. MachineIRBuilder MIB(MI);
  6638. // Now we need to emit a pair of load and stores for each of the types we've
  6639. // collected. I.e. for each type, generate a load from the source pointer of
  6640. // that type width, and then generate a corresponding store to the dest buffer
  6641. // of that value loaded. This can result in a sequence of loads and stores
  6642. // mixed types, depending on what the target specifies as good types to use.
  6643. unsigned CurrOffset = 0;
  6644. unsigned Size = KnownLen;
  6645. for (auto CopyTy : MemOps) {
  6646. // Issuing an unaligned load / store pair that overlaps with the previous
  6647. // pair. Adjust the offset accordingly.
  6648. if (CopyTy.getSizeInBytes() > Size)
  6649. CurrOffset -= CopyTy.getSizeInBytes() - Size;
  6650. // Construct MMOs for the accesses.
  6651. auto *LoadMMO =
  6652. MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
  6653. auto *StoreMMO =
  6654. MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
  6655. // Create the load.
  6656. Register LoadPtr = Src;
  6657. Register Offset;
  6658. if (CurrOffset != 0) {
  6659. LLT SrcTy = MRI.getType(Src);
  6660. Offset = MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset)
  6661. .getReg(0);
  6662. LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0);
  6663. }
  6664. auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
  6665. // Create the store.
  6666. Register StorePtr = Dst;
  6667. if (CurrOffset != 0) {
  6668. LLT DstTy = MRI.getType(Dst);
  6669. StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0);
  6670. }
  6671. MIB.buildStore(LdVal, StorePtr, *StoreMMO);
  6672. CurrOffset += CopyTy.getSizeInBytes();
  6673. Size -= CopyTy.getSizeInBytes();
  6674. }
  6675. MI.eraseFromParent();
  6676. return Legalized;
  6677. }
  6678. LegalizerHelper::LegalizeResult
  6679. LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
  6680. uint64_t KnownLen, Align DstAlign, Align SrcAlign,
  6681. bool IsVolatile) {
  6682. auto &MF = *MI.getParent()->getParent();
  6683. const auto &TLI = *MF.getSubtarget().getTargetLowering();
  6684. auto &DL = MF.getDataLayout();
  6685. LLVMContext &C = MF.getFunction().getContext();
  6686. assert(KnownLen != 0 && "Have a zero length memmove length!");
  6687. bool DstAlignCanChange = false;
  6688. MachineFrameInfo &MFI = MF.getFrameInfo();
  6689. bool OptSize = shouldLowerMemFuncForSize(MF);
  6690. Align Alignment = commonAlignment(DstAlign, SrcAlign);
  6691. MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
  6692. if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
  6693. DstAlignCanChange = true;
  6694. unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
  6695. std::vector<LLT> MemOps;
  6696. const auto &DstMMO = **MI.memoperands_begin();
  6697. const auto &SrcMMO = **std::next(MI.memoperands_begin());
  6698. MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
  6699. MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
  6700. // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
  6701. // to a bug in it's findOptimalMemOpLowering implementation. For now do the
  6702. // same thing here.
  6703. if (!findGISelOptimalMemOpLowering(
  6704. MemOps, Limit,
  6705. MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
  6706. /*IsVolatile*/ true),
  6707. DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
  6708. MF.getFunction().getAttributes(), TLI))
  6709. return UnableToLegalize;
  6710. if (DstAlignCanChange) {
  6711. // Get an estimate of the type from the LLT.
  6712. Type *IRTy = getTypeForLLT(MemOps[0], C);
  6713. Align NewAlign = DL.getABITypeAlign(IRTy);
  6714. // Don't promote to an alignment that would require dynamic stack
  6715. // realignment.
  6716. const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
  6717. if (!TRI->hasStackRealignment(MF))
  6718. while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
  6719. NewAlign = NewAlign / 2;
  6720. if (NewAlign > Alignment) {
  6721. Alignment = NewAlign;
  6722. unsigned FI = FIDef->getOperand(1).getIndex();
  6723. // Give the stack frame object a larger alignment if needed.
  6724. if (MFI.getObjectAlign(FI) < Alignment)
  6725. MFI.setObjectAlignment(FI, Alignment);
  6726. }
  6727. }
  6728. LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
  6729. MachineIRBuilder MIB(MI);
  6730. // Memmove requires that we perform the loads first before issuing the stores.
  6731. // Apart from that, this loop is pretty much doing the same thing as the
  6732. // memcpy codegen function.
  6733. unsigned CurrOffset = 0;
  6734. SmallVector<Register, 16> LoadVals;
  6735. for (auto CopyTy : MemOps) {
  6736. // Construct MMO for the load.
  6737. auto *LoadMMO =
  6738. MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
  6739. // Create the load.
  6740. Register LoadPtr = Src;
  6741. if (CurrOffset != 0) {
  6742. LLT SrcTy = MRI.getType(Src);
  6743. auto Offset =
  6744. MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset);
  6745. LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0);
  6746. }
  6747. LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
  6748. CurrOffset += CopyTy.getSizeInBytes();
  6749. }
  6750. CurrOffset = 0;
  6751. for (unsigned I = 0; I < MemOps.size(); ++I) {
  6752. LLT CopyTy = MemOps[I];
  6753. // Now store the values loaded.
  6754. auto *StoreMMO =
  6755. MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
  6756. Register StorePtr = Dst;
  6757. if (CurrOffset != 0) {
  6758. LLT DstTy = MRI.getType(Dst);
  6759. auto Offset =
  6760. MIB.buildConstant(LLT::scalar(DstTy.getSizeInBits()), CurrOffset);
  6761. StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0);
  6762. }
  6763. MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
  6764. CurrOffset += CopyTy.getSizeInBytes();
  6765. }
  6766. MI.eraseFromParent();
  6767. return Legalized;
  6768. }
  6769. LegalizerHelper::LegalizeResult
  6770. LegalizerHelper::lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
  6771. const unsigned Opc = MI.getOpcode();
  6772. // This combine is fairly complex so it's not written with a separate
  6773. // matcher function.
  6774. assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
  6775. Opc == TargetOpcode::G_MEMSET) &&
  6776. "Expected memcpy like instruction");
  6777. auto MMOIt = MI.memoperands_begin();
  6778. const MachineMemOperand *MemOp = *MMOIt;
  6779. Align DstAlign = MemOp->getBaseAlign();
  6780. Align SrcAlign;
  6781. Register Dst = MI.getOperand(0).getReg();
  6782. Register Src = MI.getOperand(1).getReg();
  6783. Register Len = MI.getOperand(2).getReg();
  6784. if (Opc != TargetOpcode::G_MEMSET) {
  6785. assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
  6786. MemOp = *(++MMOIt);
  6787. SrcAlign = MemOp->getBaseAlign();
  6788. }
  6789. // See if this is a constant length copy
  6790. auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
  6791. if (!LenVRegAndVal)
  6792. return UnableToLegalize;
  6793. uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
  6794. if (KnownLen == 0) {
  6795. MI.eraseFromParent();
  6796. return Legalized;
  6797. }
  6798. bool IsVolatile = MemOp->isVolatile();
  6799. if (Opc == TargetOpcode::G_MEMCPY_INLINE)
  6800. return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
  6801. IsVolatile);
  6802. // Don't try to optimize volatile.
  6803. if (IsVolatile)
  6804. return UnableToLegalize;
  6805. if (MaxLen && KnownLen > MaxLen)
  6806. return UnableToLegalize;
  6807. if (Opc == TargetOpcode::G_MEMCPY) {
  6808. auto &MF = *MI.getParent()->getParent();
  6809. const auto &TLI = *MF.getSubtarget().getTargetLowering();
  6810. bool OptSize = shouldLowerMemFuncForSize(MF);
  6811. uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
  6812. return lowerMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
  6813. IsVolatile);
  6814. }
  6815. if (Opc == TargetOpcode::G_MEMMOVE)
  6816. return lowerMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
  6817. if (Opc == TargetOpcode::G_MEMSET)
  6818. return lowerMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
  6819. return UnableToLegalize;
  6820. }