LegalizerHelper.cpp 289 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499650065016502650365046505650665076508650965106511651265136514651565166517651865196520652165226523652465256526652765286529653065316532653365346535653665376538653965406541654265436544654565466547654865496550655165526553655465556556655765586559656065616562656365646565656665676568656965706571657265736574657565766577657865796580658165826583658465856586658765886589659065916592659365946595659665976598659966006601660266036604660566066607660866096610661166126613661466156616661766186619662066216622662366246625662666276628662966306631663266336634663566366637663866396640664166426643664466456646664766486649665066516652665366546655665666576658665966606661666266636664666566666667666866696670667166726673667466756676667766786679668066816682668366846685668666876688668966906691669266936694669566966697669866996700670167026703670467056706670767086709671067116712671367146715671667176718671967206721672267236724672567266727672867296730673167326733673467356736673767386739674067416742674367446745674667476748674967506751675267536754675567566757675867596760676167626763676467656766676767686769677067716772677367746775677667776778677967806781678267836784678567866787678867896790679167926793679467956796679767986799680068016802680368046805680668076808680968106811681268136814681568166817681868196820682168226823682468256826682768286829683068316832683368346835683668376838683968406841684268436844684568466847684868496850685168526853685468556856685768586859686068616862686368646865686668676868686968706871687268736874687568766877687868796880688168826883688468856886688768886889689068916892689368946895689668976898689969006901690269036904690569066907690869096910691169126913691469156916691769186919692069216922692369246925692669276928692969306931693269336934693569366937693869396940694169426943694469456946694769486949695069516952695369546955695669576958695969606961696269636964696569666967696869696970697169726973697469756976697769786979698069816982698369846985698669876988698969906991699269936994699569966997699869997000700170027003700470057006700770087009701070117012701370147015701670177018701970207021702270237024702570267027702870297030703170327033703470357036703770387039704070417042704370447045704670477048704970507051705270537054705570567057705870597060706170627063706470657066706770687069707070717072707370747075707670777078707970807081708270837084708570867087708870897090709170927093709470957096709770987099710071017102710371047105710671077108710971107111711271137114711571167117711871197120712171227123712471257126712771287129713071317132713371347135713671377138713971407141714271437144714571467147714871497150715171527153715471557156715771587159716071617162716371647165716671677168716971707171717271737174717571767177717871797180718171827183718471857186718771887189719071917192719371947195719671977198719972007201720272037204720572067207720872097210721172127213721472157216721772187219722072217222722372247225722672277228722972307231723272337234723572367237723872397240724172427243724472457246724772487249725072517252725372547255725672577258725972607261726272637264726572667267726872697270727172727273727472757276727772787279728072817282728372847285728672877288728972907291729272937294729572967297729872997300730173027303730473057306730773087309731073117312731373147315731673177318731973207321732273237324732573267327732873297330733173327333733473357336733773387339734073417342734373447345734673477348734973507351735273537354735573567357735873597360736173627363736473657366736773687369737073717372737373747375737673777378737973807381738273837384738573867387738873897390739173927393739473957396739773987399740074017402740374047405740674077408740974107411741274137414741574167417741874197420742174227423742474257426742774287429743074317432743374347435743674377438743974407441744274437444744574467447744874497450745174527453745474557456745774587459746074617462746374647465746674677468746974707471747274737474747574767477747874797480748174827483748474857486748774887489749074917492749374947495749674977498749975007501750275037504750575067507750875097510751175127513751475157516751775187519752075217522752375247525752675277528752975307531753275337534753575367537753875397540754175427543754475457546754775487549755075517552755375547555755675577558755975607561756275637564756575667567756875697570757175727573757475757576757775787579758075817582758375847585758675877588758975907591759275937594759575967597759875997600760176027603760476057606760776087609761076117612761376147615761676177618761976207621762276237624762576267627762876297630763176327633763476357636763776387639764076417642764376447645764676477648764976507651765276537654765576567657765876597660766176627663766476657666766776687669767076717672767376747675767676777678767976807681768276837684768576867687768876897690769176927693769476957696769776987699770077017702770377047705770677077708770977107711771277137714771577167717771877197720772177227723772477257726772777287729773077317732773377347735773677377738773977407741774277437744774577467747774877497750775177527753775477557756775777587759776077617762776377647765776677677768776977707771777277737774777577767777777877797780778177827783778477857786778777887789779077917792779377947795779677977798779978007801780278037804780578067807780878097810781178127813781478157816781778187819782078217822782378247825782678277828782978307831783278337834783578367837783878397840784178427843784478457846784778487849785078517852785378547855785678577858785978607861786278637864786578667867786878697870787178727873787478757876787778787879788078817882788378847885788678877888788978907891789278937894789578967897789878997900790179027903790479057906790779087909791079117912791379147915791679177918791979207921792279237924792579267927792879297930793179327933793479357936793779387939794079417942794379447945794679477948794979507951795279537954795579567957795879597960796179627963796479657966796779687969797079717972797379747975797679777978797979807981798279837984798579867987798879897990799179927993799479957996799779987999800080018002800380048005800680078008800980108011801280138014801580168017801880198020802180228023802480258026802780288029803080318032803380348035803680378038803980408041804280438044804580468047804880498050805180528053805480558056805780588059806080618062806380648065806680678068806980708071807280738074807580768077807880798080808180828083808480858086808780888089809080918092809380948095809680978098809981008101810281038104810581068107810881098110811181128113811481158116811781188119812081218122812381248125812681278128812981308131813281338134813581368137813881398140
  1. //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. /// \file This file implements the LegalizerHelper class to legalize
  10. /// individual instructions and the LegalizeMachineIR wrapper pass for the
  11. /// primary legalization.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
  15. #include "llvm/CodeGen/GlobalISel/CallLowering.h"
  16. #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
  17. #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
  18. #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
  19. #include "llvm/CodeGen/GlobalISel/LostDebugLocObserver.h"
  20. #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
  21. #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
  22. #include "llvm/CodeGen/GlobalISel/Utils.h"
  23. #include "llvm/CodeGen/MachineFrameInfo.h"
  24. #include "llvm/CodeGen/MachineRegisterInfo.h"
  25. #include "llvm/CodeGen/TargetFrameLowering.h"
  26. #include "llvm/CodeGen/TargetInstrInfo.h"
  27. #include "llvm/CodeGen/TargetLowering.h"
  28. #include "llvm/CodeGen/TargetOpcodes.h"
  29. #include "llvm/CodeGen/TargetSubtargetInfo.h"
  30. #include "llvm/IR/Instructions.h"
  31. #include "llvm/Support/Debug.h"
  32. #include "llvm/Support/MathExtras.h"
  33. #include "llvm/Support/raw_ostream.h"
  34. #include "llvm/Target/TargetMachine.h"
  35. #include <numeric>
  36. #include <optional>
  37. #define DEBUG_TYPE "legalizer"
  38. using namespace llvm;
  39. using namespace LegalizeActions;
  40. using namespace MIPatternMatch;
  41. /// Try to break down \p OrigTy into \p NarrowTy sized pieces.
  42. ///
  43. /// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
  44. /// with any leftover piece as type \p LeftoverTy
  45. ///
  46. /// Returns -1 in the first element of the pair if the breakdown is not
  47. /// satisfiable.
  48. static std::pair<int, int>
  49. getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
  50. assert(!LeftoverTy.isValid() && "this is an out argument");
  51. unsigned Size = OrigTy.getSizeInBits();
  52. unsigned NarrowSize = NarrowTy.getSizeInBits();
  53. unsigned NumParts = Size / NarrowSize;
  54. unsigned LeftoverSize = Size - NumParts * NarrowSize;
  55. assert(Size > NarrowSize);
  56. if (LeftoverSize == 0)
  57. return {NumParts, 0};
  58. if (NarrowTy.isVector()) {
  59. unsigned EltSize = OrigTy.getScalarSizeInBits();
  60. if (LeftoverSize % EltSize != 0)
  61. return {-1, -1};
  62. LeftoverTy = LLT::scalarOrVector(
  63. ElementCount::getFixed(LeftoverSize / EltSize), EltSize);
  64. } else {
  65. LeftoverTy = LLT::scalar(LeftoverSize);
  66. }
  67. int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
  68. return std::make_pair(NumParts, NumLeftover);
  69. }
  70. static Type *getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty) {
  71. if (!Ty.isScalar())
  72. return nullptr;
  73. switch (Ty.getSizeInBits()) {
  74. case 16:
  75. return Type::getHalfTy(Ctx);
  76. case 32:
  77. return Type::getFloatTy(Ctx);
  78. case 64:
  79. return Type::getDoubleTy(Ctx);
  80. case 80:
  81. return Type::getX86_FP80Ty(Ctx);
  82. case 128:
  83. return Type::getFP128Ty(Ctx);
  84. default:
  85. return nullptr;
  86. }
  87. }
  88. LegalizerHelper::LegalizerHelper(MachineFunction &MF,
  89. GISelChangeObserver &Observer,
  90. MachineIRBuilder &Builder)
  91. : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
  92. LI(*MF.getSubtarget().getLegalizerInfo()),
  93. TLI(*MF.getSubtarget().getTargetLowering()) { }
  94. LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
  95. GISelChangeObserver &Observer,
  96. MachineIRBuilder &B)
  97. : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
  98. TLI(*MF.getSubtarget().getTargetLowering()) { }
  99. LegalizerHelper::LegalizeResult
  100. LegalizerHelper::legalizeInstrStep(MachineInstr &MI,
  101. LostDebugLocObserver &LocObserver) {
  102. LLVM_DEBUG(dbgs() << "Legalizing: " << MI);
  103. MIRBuilder.setInstrAndDebugLoc(MI);
  104. if (MI.getOpcode() == TargetOpcode::G_INTRINSIC ||
  105. MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
  106. return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
  107. auto Step = LI.getAction(MI, MRI);
  108. switch (Step.Action) {
  109. case Legal:
  110. LLVM_DEBUG(dbgs() << ".. Already legal\n");
  111. return AlreadyLegal;
  112. case Libcall:
  113. LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
  114. return libcall(MI, LocObserver);
  115. case NarrowScalar:
  116. LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
  117. return narrowScalar(MI, Step.TypeIdx, Step.NewType);
  118. case WidenScalar:
  119. LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
  120. return widenScalar(MI, Step.TypeIdx, Step.NewType);
  121. case Bitcast:
  122. LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
  123. return bitcast(MI, Step.TypeIdx, Step.NewType);
  124. case Lower:
  125. LLVM_DEBUG(dbgs() << ".. Lower\n");
  126. return lower(MI, Step.TypeIdx, Step.NewType);
  127. case FewerElements:
  128. LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
  129. return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
  130. case MoreElements:
  131. LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
  132. return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
  133. case Custom:
  134. LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
  135. return LI.legalizeCustom(*this, MI) ? Legalized : UnableToLegalize;
  136. default:
  137. LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
  138. return UnableToLegalize;
  139. }
  140. }
  141. void LegalizerHelper::extractParts(Register Reg, LLT Ty, int NumParts,
  142. SmallVectorImpl<Register> &VRegs) {
  143. for (int i = 0; i < NumParts; ++i)
  144. VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
  145. MIRBuilder.buildUnmerge(VRegs, Reg);
  146. }
  147. bool LegalizerHelper::extractParts(Register Reg, LLT RegTy,
  148. LLT MainTy, LLT &LeftoverTy,
  149. SmallVectorImpl<Register> &VRegs,
  150. SmallVectorImpl<Register> &LeftoverRegs) {
  151. assert(!LeftoverTy.isValid() && "this is an out argument");
  152. unsigned RegSize = RegTy.getSizeInBits();
  153. unsigned MainSize = MainTy.getSizeInBits();
  154. unsigned NumParts = RegSize / MainSize;
  155. unsigned LeftoverSize = RegSize - NumParts * MainSize;
  156. // Use an unmerge when possible.
  157. if (LeftoverSize == 0) {
  158. for (unsigned I = 0; I < NumParts; ++I)
  159. VRegs.push_back(MRI.createGenericVirtualRegister(MainTy));
  160. MIRBuilder.buildUnmerge(VRegs, Reg);
  161. return true;
  162. }
  163. // Perform irregular split. Leftover is last element of RegPieces.
  164. if (MainTy.isVector()) {
  165. SmallVector<Register, 8> RegPieces;
  166. extractVectorParts(Reg, MainTy.getNumElements(), RegPieces);
  167. for (unsigned i = 0; i < RegPieces.size() - 1; ++i)
  168. VRegs.push_back(RegPieces[i]);
  169. LeftoverRegs.push_back(RegPieces[RegPieces.size() - 1]);
  170. LeftoverTy = MRI.getType(LeftoverRegs[0]);
  171. return true;
  172. }
  173. LeftoverTy = LLT::scalar(LeftoverSize);
  174. // For irregular sizes, extract the individual parts.
  175. for (unsigned I = 0; I != NumParts; ++I) {
  176. Register NewReg = MRI.createGenericVirtualRegister(MainTy);
  177. VRegs.push_back(NewReg);
  178. MIRBuilder.buildExtract(NewReg, Reg, MainSize * I);
  179. }
  180. for (unsigned Offset = MainSize * NumParts; Offset < RegSize;
  181. Offset += LeftoverSize) {
  182. Register NewReg = MRI.createGenericVirtualRegister(LeftoverTy);
  183. LeftoverRegs.push_back(NewReg);
  184. MIRBuilder.buildExtract(NewReg, Reg, Offset);
  185. }
  186. return true;
  187. }
  188. void LegalizerHelper::extractVectorParts(Register Reg, unsigned NumElts,
  189. SmallVectorImpl<Register> &VRegs) {
  190. LLT RegTy = MRI.getType(Reg);
  191. assert(RegTy.isVector() && "Expected a vector type");
  192. LLT EltTy = RegTy.getElementType();
  193. LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
  194. unsigned RegNumElts = RegTy.getNumElements();
  195. unsigned LeftoverNumElts = RegNumElts % NumElts;
  196. unsigned NumNarrowTyPieces = RegNumElts / NumElts;
  197. // Perfect split without leftover
  198. if (LeftoverNumElts == 0)
  199. return extractParts(Reg, NarrowTy, NumNarrowTyPieces, VRegs);
  200. // Irregular split. Provide direct access to all elements for artifact
  201. // combiner using unmerge to elements. Then build vectors with NumElts
  202. // elements. Remaining element(s) will be (used to build vector) Leftover.
  203. SmallVector<Register, 8> Elts;
  204. extractParts(Reg, EltTy, RegNumElts, Elts);
  205. unsigned Offset = 0;
  206. // Requested sub-vectors of NarrowTy.
  207. for (unsigned i = 0; i < NumNarrowTyPieces; ++i, Offset += NumElts) {
  208. ArrayRef<Register> Pieces(&Elts[Offset], NumElts);
  209. VRegs.push_back(MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
  210. }
  211. // Leftover element(s).
  212. if (LeftoverNumElts == 1) {
  213. VRegs.push_back(Elts[Offset]);
  214. } else {
  215. LLT LeftoverTy = LLT::fixed_vector(LeftoverNumElts, EltTy);
  216. ArrayRef<Register> Pieces(&Elts[Offset], LeftoverNumElts);
  217. VRegs.push_back(
  218. MIRBuilder.buildMergeLikeInstr(LeftoverTy, Pieces).getReg(0));
  219. }
  220. }
  221. void LegalizerHelper::insertParts(Register DstReg,
  222. LLT ResultTy, LLT PartTy,
  223. ArrayRef<Register> PartRegs,
  224. LLT LeftoverTy,
  225. ArrayRef<Register> LeftoverRegs) {
  226. if (!LeftoverTy.isValid()) {
  227. assert(LeftoverRegs.empty());
  228. if (!ResultTy.isVector()) {
  229. MIRBuilder.buildMergeLikeInstr(DstReg, PartRegs);
  230. return;
  231. }
  232. if (PartTy.isVector())
  233. MIRBuilder.buildConcatVectors(DstReg, PartRegs);
  234. else
  235. MIRBuilder.buildBuildVector(DstReg, PartRegs);
  236. return;
  237. }
  238. // Merge sub-vectors with different number of elements and insert into DstReg.
  239. if (ResultTy.isVector()) {
  240. assert(LeftoverRegs.size() == 1 && "Expected one leftover register");
  241. SmallVector<Register, 8> AllRegs;
  242. for (auto Reg : concat<const Register>(PartRegs, LeftoverRegs))
  243. AllRegs.push_back(Reg);
  244. return mergeMixedSubvectors(DstReg, AllRegs);
  245. }
  246. SmallVector<Register> GCDRegs;
  247. LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy);
  248. for (auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
  249. extractGCDType(GCDRegs, GCDTy, PartReg);
  250. LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
  251. buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
  252. }
  253. void LegalizerHelper::appendVectorElts(SmallVectorImpl<Register> &Elts,
  254. Register Reg) {
  255. LLT Ty = MRI.getType(Reg);
  256. SmallVector<Register, 8> RegElts;
  257. extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts);
  258. Elts.append(RegElts);
  259. }
  260. /// Merge \p PartRegs with different types into \p DstReg.
  261. void LegalizerHelper::mergeMixedSubvectors(Register DstReg,
  262. ArrayRef<Register> PartRegs) {
  263. SmallVector<Register, 8> AllElts;
  264. for (unsigned i = 0; i < PartRegs.size() - 1; ++i)
  265. appendVectorElts(AllElts, PartRegs[i]);
  266. Register Leftover = PartRegs[PartRegs.size() - 1];
  267. if (MRI.getType(Leftover).isScalar())
  268. AllElts.push_back(Leftover);
  269. else
  270. appendVectorElts(AllElts, Leftover);
  271. MIRBuilder.buildMergeLikeInstr(DstReg, AllElts);
  272. }
  273. /// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
  274. static void getUnmergeResults(SmallVectorImpl<Register> &Regs,
  275. const MachineInstr &MI) {
  276. assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
  277. const int StartIdx = Regs.size();
  278. const int NumResults = MI.getNumOperands() - 1;
  279. Regs.resize(Regs.size() + NumResults);
  280. for (int I = 0; I != NumResults; ++I)
  281. Regs[StartIdx + I] = MI.getOperand(I).getReg();
  282. }
  283. void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
  284. LLT GCDTy, Register SrcReg) {
  285. LLT SrcTy = MRI.getType(SrcReg);
  286. if (SrcTy == GCDTy) {
  287. // If the source already evenly divides the result type, we don't need to do
  288. // anything.
  289. Parts.push_back(SrcReg);
  290. } else {
  291. // Need to split into common type sized pieces.
  292. auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
  293. getUnmergeResults(Parts, *Unmerge);
  294. }
  295. }
  296. LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
  297. LLT NarrowTy, Register SrcReg) {
  298. LLT SrcTy = MRI.getType(SrcReg);
  299. LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
  300. extractGCDType(Parts, GCDTy, SrcReg);
  301. return GCDTy;
  302. }
  303. LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
  304. SmallVectorImpl<Register> &VRegs,
  305. unsigned PadStrategy) {
  306. LLT LCMTy = getLCMType(DstTy, NarrowTy);
  307. int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
  308. int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
  309. int NumOrigSrc = VRegs.size();
  310. Register PadReg;
  311. // Get a value we can use to pad the source value if the sources won't evenly
  312. // cover the result type.
  313. if (NumOrigSrc < NumParts * NumSubParts) {
  314. if (PadStrategy == TargetOpcode::G_ZEXT)
  315. PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
  316. else if (PadStrategy == TargetOpcode::G_ANYEXT)
  317. PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
  318. else {
  319. assert(PadStrategy == TargetOpcode::G_SEXT);
  320. // Shift the sign bit of the low register through the high register.
  321. auto ShiftAmt =
  322. MIRBuilder.buildConstant(LLT::scalar(64), GCDTy.getSizeInBits() - 1);
  323. PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
  324. }
  325. }
  326. // Registers for the final merge to be produced.
  327. SmallVector<Register, 4> Remerge(NumParts);
  328. // Registers needed for intermediate merges, which will be merged into a
  329. // source for Remerge.
  330. SmallVector<Register, 4> SubMerge(NumSubParts);
  331. // Once we've fully read off the end of the original source bits, we can reuse
  332. // the same high bits for remaining padding elements.
  333. Register AllPadReg;
  334. // Build merges to the LCM type to cover the original result type.
  335. for (int I = 0; I != NumParts; ++I) {
  336. bool AllMergePartsArePadding = true;
  337. // Build the requested merges to the requested type.
  338. for (int J = 0; J != NumSubParts; ++J) {
  339. int Idx = I * NumSubParts + J;
  340. if (Idx >= NumOrigSrc) {
  341. SubMerge[J] = PadReg;
  342. continue;
  343. }
  344. SubMerge[J] = VRegs[Idx];
  345. // There are meaningful bits here we can't reuse later.
  346. AllMergePartsArePadding = false;
  347. }
  348. // If we've filled up a complete piece with padding bits, we can directly
  349. // emit the natural sized constant if applicable, rather than a merge of
  350. // smaller constants.
  351. if (AllMergePartsArePadding && !AllPadReg) {
  352. if (PadStrategy == TargetOpcode::G_ANYEXT)
  353. AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
  354. else if (PadStrategy == TargetOpcode::G_ZEXT)
  355. AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
  356. // If this is a sign extension, we can't materialize a trivial constant
  357. // with the right type and have to produce a merge.
  358. }
  359. if (AllPadReg) {
  360. // Avoid creating additional instructions if we're just adding additional
  361. // copies of padding bits.
  362. Remerge[I] = AllPadReg;
  363. continue;
  364. }
  365. if (NumSubParts == 1)
  366. Remerge[I] = SubMerge[0];
  367. else
  368. Remerge[I] = MIRBuilder.buildMergeLikeInstr(NarrowTy, SubMerge).getReg(0);
  369. // In the sign extend padding case, re-use the first all-signbit merge.
  370. if (AllMergePartsArePadding && !AllPadReg)
  371. AllPadReg = Remerge[I];
  372. }
  373. VRegs = std::move(Remerge);
  374. return LCMTy;
  375. }
  376. void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
  377. ArrayRef<Register> RemergeRegs) {
  378. LLT DstTy = MRI.getType(DstReg);
  379. // Create the merge to the widened source, and extract the relevant bits into
  380. // the result.
  381. if (DstTy == LCMTy) {
  382. MIRBuilder.buildMergeLikeInstr(DstReg, RemergeRegs);
  383. return;
  384. }
  385. auto Remerge = MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs);
  386. if (DstTy.isScalar() && LCMTy.isScalar()) {
  387. MIRBuilder.buildTrunc(DstReg, Remerge);
  388. return;
  389. }
  390. if (LCMTy.isVector()) {
  391. unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
  392. SmallVector<Register, 8> UnmergeDefs(NumDefs);
  393. UnmergeDefs[0] = DstReg;
  394. for (unsigned I = 1; I != NumDefs; ++I)
  395. UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
  396. MIRBuilder.buildUnmerge(UnmergeDefs,
  397. MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs));
  398. return;
  399. }
  400. llvm_unreachable("unhandled case");
  401. }
  402. static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
  403. #define RTLIBCASE_INT(LibcallPrefix) \
  404. do { \
  405. switch (Size) { \
  406. case 32: \
  407. return RTLIB::LibcallPrefix##32; \
  408. case 64: \
  409. return RTLIB::LibcallPrefix##64; \
  410. case 128: \
  411. return RTLIB::LibcallPrefix##128; \
  412. default: \
  413. llvm_unreachable("unexpected size"); \
  414. } \
  415. } while (0)
  416. #define RTLIBCASE(LibcallPrefix) \
  417. do { \
  418. switch (Size) { \
  419. case 32: \
  420. return RTLIB::LibcallPrefix##32; \
  421. case 64: \
  422. return RTLIB::LibcallPrefix##64; \
  423. case 80: \
  424. return RTLIB::LibcallPrefix##80; \
  425. case 128: \
  426. return RTLIB::LibcallPrefix##128; \
  427. default: \
  428. llvm_unreachable("unexpected size"); \
  429. } \
  430. } while (0)
  431. switch (Opcode) {
  432. case TargetOpcode::G_MUL:
  433. RTLIBCASE_INT(MUL_I);
  434. case TargetOpcode::G_SDIV:
  435. RTLIBCASE_INT(SDIV_I);
  436. case TargetOpcode::G_UDIV:
  437. RTLIBCASE_INT(UDIV_I);
  438. case TargetOpcode::G_SREM:
  439. RTLIBCASE_INT(SREM_I);
  440. case TargetOpcode::G_UREM:
  441. RTLIBCASE_INT(UREM_I);
  442. case TargetOpcode::G_CTLZ_ZERO_UNDEF:
  443. RTLIBCASE_INT(CTLZ_I);
  444. case TargetOpcode::G_FADD:
  445. RTLIBCASE(ADD_F);
  446. case TargetOpcode::G_FSUB:
  447. RTLIBCASE(SUB_F);
  448. case TargetOpcode::G_FMUL:
  449. RTLIBCASE(MUL_F);
  450. case TargetOpcode::G_FDIV:
  451. RTLIBCASE(DIV_F);
  452. case TargetOpcode::G_FEXP:
  453. RTLIBCASE(EXP_F);
  454. case TargetOpcode::G_FEXP2:
  455. RTLIBCASE(EXP2_F);
  456. case TargetOpcode::G_FREM:
  457. RTLIBCASE(REM_F);
  458. case TargetOpcode::G_FPOW:
  459. RTLIBCASE(POW_F);
  460. case TargetOpcode::G_FMA:
  461. RTLIBCASE(FMA_F);
  462. case TargetOpcode::G_FSIN:
  463. RTLIBCASE(SIN_F);
  464. case TargetOpcode::G_FCOS:
  465. RTLIBCASE(COS_F);
  466. case TargetOpcode::G_FLOG10:
  467. RTLIBCASE(LOG10_F);
  468. case TargetOpcode::G_FLOG:
  469. RTLIBCASE(LOG_F);
  470. case TargetOpcode::G_FLOG2:
  471. RTLIBCASE(LOG2_F);
  472. case TargetOpcode::G_FCEIL:
  473. RTLIBCASE(CEIL_F);
  474. case TargetOpcode::G_FFLOOR:
  475. RTLIBCASE(FLOOR_F);
  476. case TargetOpcode::G_FMINNUM:
  477. RTLIBCASE(FMIN_F);
  478. case TargetOpcode::G_FMAXNUM:
  479. RTLIBCASE(FMAX_F);
  480. case TargetOpcode::G_FSQRT:
  481. RTLIBCASE(SQRT_F);
  482. case TargetOpcode::G_FRINT:
  483. RTLIBCASE(RINT_F);
  484. case TargetOpcode::G_FNEARBYINT:
  485. RTLIBCASE(NEARBYINT_F);
  486. case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
  487. RTLIBCASE(ROUNDEVEN_F);
  488. }
  489. llvm_unreachable("Unknown libcall function");
  490. }
  491. /// True if an instruction is in tail position in its caller. Intended for
  492. /// legalizing libcalls as tail calls when possible.
  493. static bool isLibCallInTailPosition(MachineInstr &MI,
  494. const TargetInstrInfo &TII,
  495. MachineRegisterInfo &MRI) {
  496. MachineBasicBlock &MBB = *MI.getParent();
  497. const Function &F = MBB.getParent()->getFunction();
  498. // Conservatively require the attributes of the call to match those of
  499. // the return. Ignore NoAlias and NonNull because they don't affect the
  500. // call sequence.
  501. AttributeList CallerAttrs = F.getAttributes();
  502. if (AttrBuilder(F.getContext(), CallerAttrs.getRetAttrs())
  503. .removeAttribute(Attribute::NoAlias)
  504. .removeAttribute(Attribute::NonNull)
  505. .hasAttributes())
  506. return false;
  507. // It's not safe to eliminate the sign / zero extension of the return value.
  508. if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
  509. CallerAttrs.hasRetAttr(Attribute::SExt))
  510. return false;
  511. // Only tail call if the following instruction is a standard return or if we
  512. // have a `thisreturn` callee, and a sequence like:
  513. //
  514. // G_MEMCPY %0, %1, %2
  515. // $x0 = COPY %0
  516. // RET_ReallyLR implicit $x0
  517. auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
  518. if (Next != MBB.instr_end() && Next->isCopy()) {
  519. switch (MI.getOpcode()) {
  520. default:
  521. llvm_unreachable("unsupported opcode");
  522. case TargetOpcode::G_BZERO:
  523. return false;
  524. case TargetOpcode::G_MEMCPY:
  525. case TargetOpcode::G_MEMMOVE:
  526. case TargetOpcode::G_MEMSET:
  527. break;
  528. }
  529. Register VReg = MI.getOperand(0).getReg();
  530. if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg())
  531. return false;
  532. Register PReg = Next->getOperand(0).getReg();
  533. if (!PReg.isPhysical())
  534. return false;
  535. auto Ret = next_nodbg(Next, MBB.instr_end());
  536. if (Ret == MBB.instr_end() || !Ret->isReturn())
  537. return false;
  538. if (Ret->getNumImplicitOperands() != 1)
  539. return false;
  540. if (PReg != Ret->getOperand(0).getReg())
  541. return false;
  542. // Skip over the COPY that we just validated.
  543. Next = Ret;
  544. }
  545. if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
  546. return false;
  547. return true;
  548. }
  549. LegalizerHelper::LegalizeResult
  550. llvm::createLibcall(MachineIRBuilder &MIRBuilder, const char *Name,
  551. const CallLowering::ArgInfo &Result,
  552. ArrayRef<CallLowering::ArgInfo> Args,
  553. const CallingConv::ID CC) {
  554. auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
  555. CallLowering::CallLoweringInfo Info;
  556. Info.CallConv = CC;
  557. Info.Callee = MachineOperand::CreateES(Name);
  558. Info.OrigRet = Result;
  559. std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
  560. if (!CLI.lowerCall(MIRBuilder, Info))
  561. return LegalizerHelper::UnableToLegalize;
  562. return LegalizerHelper::Legalized;
  563. }
  564. LegalizerHelper::LegalizeResult
  565. llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
  566. const CallLowering::ArgInfo &Result,
  567. ArrayRef<CallLowering::ArgInfo> Args) {
  568. auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
  569. const char *Name = TLI.getLibcallName(Libcall);
  570. const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
  571. return createLibcall(MIRBuilder, Name, Result, Args, CC);
  572. }
  573. // Useful for libcalls where all operands have the same type.
  574. static LegalizerHelper::LegalizeResult
  575. simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
  576. Type *OpType) {
  577. auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
  578. // FIXME: What does the original arg index mean here?
  579. SmallVector<CallLowering::ArgInfo, 3> Args;
  580. for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
  581. Args.push_back({MO.getReg(), OpType, 0});
  582. return createLibcall(MIRBuilder, Libcall,
  583. {MI.getOperand(0).getReg(), OpType, 0}, Args);
  584. }
  585. LegalizerHelper::LegalizeResult
  586. llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
  587. MachineInstr &MI, LostDebugLocObserver &LocObserver) {
  588. auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
  589. SmallVector<CallLowering::ArgInfo, 3> Args;
  590. // Add all the args, except for the last which is an imm denoting 'tail'.
  591. for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
  592. Register Reg = MI.getOperand(i).getReg();
  593. // Need derive an IR type for call lowering.
  594. LLT OpLLT = MRI.getType(Reg);
  595. Type *OpTy = nullptr;
  596. if (OpLLT.isPointer())
  597. OpTy = Type::getInt8PtrTy(Ctx, OpLLT.getAddressSpace());
  598. else
  599. OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
  600. Args.push_back({Reg, OpTy, 0});
  601. }
  602. auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
  603. auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
  604. RTLIB::Libcall RTLibcall;
  605. unsigned Opc = MI.getOpcode();
  606. switch (Opc) {
  607. case TargetOpcode::G_BZERO:
  608. RTLibcall = RTLIB::BZERO;
  609. break;
  610. case TargetOpcode::G_MEMCPY:
  611. RTLibcall = RTLIB::MEMCPY;
  612. Args[0].Flags[0].setReturned();
  613. break;
  614. case TargetOpcode::G_MEMMOVE:
  615. RTLibcall = RTLIB::MEMMOVE;
  616. Args[0].Flags[0].setReturned();
  617. break;
  618. case TargetOpcode::G_MEMSET:
  619. RTLibcall = RTLIB::MEMSET;
  620. Args[0].Flags[0].setReturned();
  621. break;
  622. default:
  623. llvm_unreachable("unsupported opcode");
  624. }
  625. const char *Name = TLI.getLibcallName(RTLibcall);
  626. // Unsupported libcall on the target.
  627. if (!Name) {
  628. LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
  629. << MIRBuilder.getTII().getName(Opc) << "\n");
  630. return LegalizerHelper::UnableToLegalize;
  631. }
  632. CallLowering::CallLoweringInfo Info;
  633. Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
  634. Info.Callee = MachineOperand::CreateES(Name);
  635. Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0);
  636. Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() &&
  637. isLibCallInTailPosition(MI, MIRBuilder.getTII(), MRI);
  638. std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
  639. if (!CLI.lowerCall(MIRBuilder, Info))
  640. return LegalizerHelper::UnableToLegalize;
  641. if (Info.LoweredTailCall) {
  642. assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
  643. // Check debug locations before removing the return.
  644. LocObserver.checkpoint(true);
  645. // We must have a return following the call (or debug insts) to get past
  646. // isLibCallInTailPosition.
  647. do {
  648. MachineInstr *Next = MI.getNextNode();
  649. assert(Next &&
  650. (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
  651. "Expected instr following MI to be return or debug inst?");
  652. // We lowered a tail call, so the call is now the return from the block.
  653. // Delete the old return.
  654. Next->eraseFromParent();
  655. } while (MI.getNextNode());
  656. // We expect to lose the debug location from the return.
  657. LocObserver.checkpoint(false);
  658. }
  659. return LegalizerHelper::Legalized;
  660. }
  661. static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
  662. Type *FromType) {
  663. auto ToMVT = MVT::getVT(ToType);
  664. auto FromMVT = MVT::getVT(FromType);
  665. switch (Opcode) {
  666. case TargetOpcode::G_FPEXT:
  667. return RTLIB::getFPEXT(FromMVT, ToMVT);
  668. case TargetOpcode::G_FPTRUNC:
  669. return RTLIB::getFPROUND(FromMVT, ToMVT);
  670. case TargetOpcode::G_FPTOSI:
  671. return RTLIB::getFPTOSINT(FromMVT, ToMVT);
  672. case TargetOpcode::G_FPTOUI:
  673. return RTLIB::getFPTOUINT(FromMVT, ToMVT);
  674. case TargetOpcode::G_SITOFP:
  675. return RTLIB::getSINTTOFP(FromMVT, ToMVT);
  676. case TargetOpcode::G_UITOFP:
  677. return RTLIB::getUINTTOFP(FromMVT, ToMVT);
  678. }
  679. llvm_unreachable("Unsupported libcall function");
  680. }
  681. static LegalizerHelper::LegalizeResult
  682. conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType,
  683. Type *FromType) {
  684. RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
  685. return createLibcall(MIRBuilder, Libcall,
  686. {MI.getOperand(0).getReg(), ToType, 0},
  687. {{MI.getOperand(1).getReg(), FromType, 0}});
  688. }
  689. LegalizerHelper::LegalizeResult
  690. LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
  691. LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
  692. unsigned Size = LLTy.getSizeInBits();
  693. auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
  694. switch (MI.getOpcode()) {
  695. default:
  696. return UnableToLegalize;
  697. case TargetOpcode::G_MUL:
  698. case TargetOpcode::G_SDIV:
  699. case TargetOpcode::G_UDIV:
  700. case TargetOpcode::G_SREM:
  701. case TargetOpcode::G_UREM:
  702. case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
  703. Type *HLTy = IntegerType::get(Ctx, Size);
  704. auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
  705. if (Status != Legalized)
  706. return Status;
  707. break;
  708. }
  709. case TargetOpcode::G_FADD:
  710. case TargetOpcode::G_FSUB:
  711. case TargetOpcode::G_FMUL:
  712. case TargetOpcode::G_FDIV:
  713. case TargetOpcode::G_FMA:
  714. case TargetOpcode::G_FPOW:
  715. case TargetOpcode::G_FREM:
  716. case TargetOpcode::G_FCOS:
  717. case TargetOpcode::G_FSIN:
  718. case TargetOpcode::G_FLOG10:
  719. case TargetOpcode::G_FLOG:
  720. case TargetOpcode::G_FLOG2:
  721. case TargetOpcode::G_FEXP:
  722. case TargetOpcode::G_FEXP2:
  723. case TargetOpcode::G_FCEIL:
  724. case TargetOpcode::G_FFLOOR:
  725. case TargetOpcode::G_FMINNUM:
  726. case TargetOpcode::G_FMAXNUM:
  727. case TargetOpcode::G_FSQRT:
  728. case TargetOpcode::G_FRINT:
  729. case TargetOpcode::G_FNEARBYINT:
  730. case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
  731. Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
  732. if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
  733. LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
  734. return UnableToLegalize;
  735. }
  736. auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
  737. if (Status != Legalized)
  738. return Status;
  739. break;
  740. }
  741. case TargetOpcode::G_FPEXT:
  742. case TargetOpcode::G_FPTRUNC: {
  743. Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
  744. Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
  745. if (!FromTy || !ToTy)
  746. return UnableToLegalize;
  747. LegalizeResult Status = conversionLibcall(MI, MIRBuilder, ToTy, FromTy );
  748. if (Status != Legalized)
  749. return Status;
  750. break;
  751. }
  752. case TargetOpcode::G_FPTOSI:
  753. case TargetOpcode::G_FPTOUI: {
  754. // FIXME: Support other types
  755. unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
  756. unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
  757. if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
  758. return UnableToLegalize;
  759. LegalizeResult Status = conversionLibcall(
  760. MI, MIRBuilder,
  761. ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
  762. FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx));
  763. if (Status != Legalized)
  764. return Status;
  765. break;
  766. }
  767. case TargetOpcode::G_SITOFP:
  768. case TargetOpcode::G_UITOFP: {
  769. // FIXME: Support other types
  770. unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
  771. unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
  772. if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
  773. return UnableToLegalize;
  774. LegalizeResult Status = conversionLibcall(
  775. MI, MIRBuilder,
  776. ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
  777. FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx));
  778. if (Status != Legalized)
  779. return Status;
  780. break;
  781. }
  782. case TargetOpcode::G_BZERO:
  783. case TargetOpcode::G_MEMCPY:
  784. case TargetOpcode::G_MEMMOVE:
  785. case TargetOpcode::G_MEMSET: {
  786. LegalizeResult Result =
  787. createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI, LocObserver);
  788. if (Result != Legalized)
  789. return Result;
  790. MI.eraseFromParent();
  791. return Result;
  792. }
  793. }
  794. MI.eraseFromParent();
  795. return Legalized;
  796. }
  797. LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
  798. unsigned TypeIdx,
  799. LLT NarrowTy) {
  800. uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
  801. uint64_t NarrowSize = NarrowTy.getSizeInBits();
  802. switch (MI.getOpcode()) {
  803. default:
  804. return UnableToLegalize;
  805. case TargetOpcode::G_IMPLICIT_DEF: {
  806. Register DstReg = MI.getOperand(0).getReg();
  807. LLT DstTy = MRI.getType(DstReg);
  808. // If SizeOp0 is not an exact multiple of NarrowSize, emit
  809. // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
  810. // FIXME: Although this would also be legal for the general case, it causes
  811. // a lot of regressions in the emitted code (superfluous COPYs, artifact
  812. // combines not being hit). This seems to be a problem related to the
  813. // artifact combiner.
  814. if (SizeOp0 % NarrowSize != 0) {
  815. LLT ImplicitTy = NarrowTy;
  816. if (DstTy.isVector())
  817. ImplicitTy = LLT::vector(DstTy.getElementCount(), ImplicitTy);
  818. Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
  819. MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
  820. MI.eraseFromParent();
  821. return Legalized;
  822. }
  823. int NumParts = SizeOp0 / NarrowSize;
  824. SmallVector<Register, 2> DstRegs;
  825. for (int i = 0; i < NumParts; ++i)
  826. DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
  827. if (DstTy.isVector())
  828. MIRBuilder.buildBuildVector(DstReg, DstRegs);
  829. else
  830. MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
  831. MI.eraseFromParent();
  832. return Legalized;
  833. }
  834. case TargetOpcode::G_CONSTANT: {
  835. LLT Ty = MRI.getType(MI.getOperand(0).getReg());
  836. const APInt &Val = MI.getOperand(1).getCImm()->getValue();
  837. unsigned TotalSize = Ty.getSizeInBits();
  838. unsigned NarrowSize = NarrowTy.getSizeInBits();
  839. int NumParts = TotalSize / NarrowSize;
  840. SmallVector<Register, 4> PartRegs;
  841. for (int I = 0; I != NumParts; ++I) {
  842. unsigned Offset = I * NarrowSize;
  843. auto K = MIRBuilder.buildConstant(NarrowTy,
  844. Val.lshr(Offset).trunc(NarrowSize));
  845. PartRegs.push_back(K.getReg(0));
  846. }
  847. LLT LeftoverTy;
  848. unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
  849. SmallVector<Register, 1> LeftoverRegs;
  850. if (LeftoverBits != 0) {
  851. LeftoverTy = LLT::scalar(LeftoverBits);
  852. auto K = MIRBuilder.buildConstant(
  853. LeftoverTy,
  854. Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
  855. LeftoverRegs.push_back(K.getReg(0));
  856. }
  857. insertParts(MI.getOperand(0).getReg(),
  858. Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
  859. MI.eraseFromParent();
  860. return Legalized;
  861. }
  862. case TargetOpcode::G_SEXT:
  863. case TargetOpcode::G_ZEXT:
  864. case TargetOpcode::G_ANYEXT:
  865. return narrowScalarExt(MI, TypeIdx, NarrowTy);
  866. case TargetOpcode::G_TRUNC: {
  867. if (TypeIdx != 1)
  868. return UnableToLegalize;
  869. uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
  870. if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
  871. LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
  872. return UnableToLegalize;
  873. }
  874. auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
  875. MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
  876. MI.eraseFromParent();
  877. return Legalized;
  878. }
  879. case TargetOpcode::G_FREEZE: {
  880. if (TypeIdx != 0)
  881. return UnableToLegalize;
  882. LLT Ty = MRI.getType(MI.getOperand(0).getReg());
  883. // Should widen scalar first
  884. if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0)
  885. return UnableToLegalize;
  886. auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
  887. SmallVector<Register, 8> Parts;
  888. for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
  889. Parts.push_back(
  890. MIRBuilder.buildFreeze(NarrowTy, Unmerge.getReg(i)).getReg(0));
  891. }
  892. MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), Parts);
  893. MI.eraseFromParent();
  894. return Legalized;
  895. }
  896. case TargetOpcode::G_ADD:
  897. case TargetOpcode::G_SUB:
  898. case TargetOpcode::G_SADDO:
  899. case TargetOpcode::G_SSUBO:
  900. case TargetOpcode::G_SADDE:
  901. case TargetOpcode::G_SSUBE:
  902. case TargetOpcode::G_UADDO:
  903. case TargetOpcode::G_USUBO:
  904. case TargetOpcode::G_UADDE:
  905. case TargetOpcode::G_USUBE:
  906. return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
  907. case TargetOpcode::G_MUL:
  908. case TargetOpcode::G_UMULH:
  909. return narrowScalarMul(MI, NarrowTy);
  910. case TargetOpcode::G_EXTRACT:
  911. return narrowScalarExtract(MI, TypeIdx, NarrowTy);
  912. case TargetOpcode::G_INSERT:
  913. return narrowScalarInsert(MI, TypeIdx, NarrowTy);
  914. case TargetOpcode::G_LOAD: {
  915. auto &LoadMI = cast<GLoad>(MI);
  916. Register DstReg = LoadMI.getDstReg();
  917. LLT DstTy = MRI.getType(DstReg);
  918. if (DstTy.isVector())
  919. return UnableToLegalize;
  920. if (8 * LoadMI.getMemSize() != DstTy.getSizeInBits()) {
  921. Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
  922. MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
  923. MIRBuilder.buildAnyExt(DstReg, TmpReg);
  924. LoadMI.eraseFromParent();
  925. return Legalized;
  926. }
  927. return reduceLoadStoreWidth(LoadMI, TypeIdx, NarrowTy);
  928. }
  929. case TargetOpcode::G_ZEXTLOAD:
  930. case TargetOpcode::G_SEXTLOAD: {
  931. auto &LoadMI = cast<GExtLoad>(MI);
  932. Register DstReg = LoadMI.getDstReg();
  933. Register PtrReg = LoadMI.getPointerReg();
  934. Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
  935. auto &MMO = LoadMI.getMMO();
  936. unsigned MemSize = MMO.getSizeInBits();
  937. if (MemSize == NarrowSize) {
  938. MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
  939. } else if (MemSize < NarrowSize) {
  940. MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
  941. } else if (MemSize > NarrowSize) {
  942. // FIXME: Need to split the load.
  943. return UnableToLegalize;
  944. }
  945. if (isa<GZExtLoad>(LoadMI))
  946. MIRBuilder.buildZExt(DstReg, TmpReg);
  947. else
  948. MIRBuilder.buildSExt(DstReg, TmpReg);
  949. LoadMI.eraseFromParent();
  950. return Legalized;
  951. }
  952. case TargetOpcode::G_STORE: {
  953. auto &StoreMI = cast<GStore>(MI);
  954. Register SrcReg = StoreMI.getValueReg();
  955. LLT SrcTy = MRI.getType(SrcReg);
  956. if (SrcTy.isVector())
  957. return UnableToLegalize;
  958. int NumParts = SizeOp0 / NarrowSize;
  959. unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
  960. unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
  961. if (SrcTy.isVector() && LeftoverBits != 0)
  962. return UnableToLegalize;
  963. if (8 * StoreMI.getMemSize() != SrcTy.getSizeInBits()) {
  964. Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
  965. MIRBuilder.buildTrunc(TmpReg, SrcReg);
  966. MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
  967. StoreMI.eraseFromParent();
  968. return Legalized;
  969. }
  970. return reduceLoadStoreWidth(StoreMI, 0, NarrowTy);
  971. }
  972. case TargetOpcode::G_SELECT:
  973. return narrowScalarSelect(MI, TypeIdx, NarrowTy);
  974. case TargetOpcode::G_AND:
  975. case TargetOpcode::G_OR:
  976. case TargetOpcode::G_XOR: {
  977. // Legalize bitwise operation:
  978. // A = BinOp<Ty> B, C
  979. // into:
  980. // B1, ..., BN = G_UNMERGE_VALUES B
  981. // C1, ..., CN = G_UNMERGE_VALUES C
  982. // A1 = BinOp<Ty/N> B1, C2
  983. // ...
  984. // AN = BinOp<Ty/N> BN, CN
  985. // A = G_MERGE_VALUES A1, ..., AN
  986. return narrowScalarBasic(MI, TypeIdx, NarrowTy);
  987. }
  988. case TargetOpcode::G_SHL:
  989. case TargetOpcode::G_LSHR:
  990. case TargetOpcode::G_ASHR:
  991. return narrowScalarShift(MI, TypeIdx, NarrowTy);
  992. case TargetOpcode::G_CTLZ:
  993. case TargetOpcode::G_CTLZ_ZERO_UNDEF:
  994. case TargetOpcode::G_CTTZ:
  995. case TargetOpcode::G_CTTZ_ZERO_UNDEF:
  996. case TargetOpcode::G_CTPOP:
  997. if (TypeIdx == 1)
  998. switch (MI.getOpcode()) {
  999. case TargetOpcode::G_CTLZ:
  1000. case TargetOpcode::G_CTLZ_ZERO_UNDEF:
  1001. return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
  1002. case TargetOpcode::G_CTTZ:
  1003. case TargetOpcode::G_CTTZ_ZERO_UNDEF:
  1004. return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
  1005. case TargetOpcode::G_CTPOP:
  1006. return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
  1007. default:
  1008. return UnableToLegalize;
  1009. }
  1010. Observer.changingInstr(MI);
  1011. narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
  1012. Observer.changedInstr(MI);
  1013. return Legalized;
  1014. case TargetOpcode::G_INTTOPTR:
  1015. if (TypeIdx != 1)
  1016. return UnableToLegalize;
  1017. Observer.changingInstr(MI);
  1018. narrowScalarSrc(MI, NarrowTy, 1);
  1019. Observer.changedInstr(MI);
  1020. return Legalized;
  1021. case TargetOpcode::G_PTRTOINT:
  1022. if (TypeIdx != 0)
  1023. return UnableToLegalize;
  1024. Observer.changingInstr(MI);
  1025. narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
  1026. Observer.changedInstr(MI);
  1027. return Legalized;
  1028. case TargetOpcode::G_PHI: {
  1029. // FIXME: add support for when SizeOp0 isn't an exact multiple of
  1030. // NarrowSize.
  1031. if (SizeOp0 % NarrowSize != 0)
  1032. return UnableToLegalize;
  1033. unsigned NumParts = SizeOp0 / NarrowSize;
  1034. SmallVector<Register, 2> DstRegs(NumParts);
  1035. SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
  1036. Observer.changingInstr(MI);
  1037. for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
  1038. MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
  1039. MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
  1040. extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
  1041. SrcRegs[i / 2]);
  1042. }
  1043. MachineBasicBlock &MBB = *MI.getParent();
  1044. MIRBuilder.setInsertPt(MBB, MI);
  1045. for (unsigned i = 0; i < NumParts; ++i) {
  1046. DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
  1047. MachineInstrBuilder MIB =
  1048. MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
  1049. for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
  1050. MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
  1051. }
  1052. MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
  1053. MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
  1054. Observer.changedInstr(MI);
  1055. MI.eraseFromParent();
  1056. return Legalized;
  1057. }
  1058. case TargetOpcode::G_EXTRACT_VECTOR_ELT:
  1059. case TargetOpcode::G_INSERT_VECTOR_ELT: {
  1060. if (TypeIdx != 2)
  1061. return UnableToLegalize;
  1062. int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
  1063. Observer.changingInstr(MI);
  1064. narrowScalarSrc(MI, NarrowTy, OpIdx);
  1065. Observer.changedInstr(MI);
  1066. return Legalized;
  1067. }
  1068. case TargetOpcode::G_ICMP: {
  1069. Register LHS = MI.getOperand(2).getReg();
  1070. LLT SrcTy = MRI.getType(LHS);
  1071. uint64_t SrcSize = SrcTy.getSizeInBits();
  1072. CmpInst::Predicate Pred =
  1073. static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
  1074. // TODO: Handle the non-equality case for weird sizes.
  1075. if (NarrowSize * 2 != SrcSize && !ICmpInst::isEquality(Pred))
  1076. return UnableToLegalize;
  1077. LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
  1078. SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
  1079. if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
  1080. LHSLeftoverRegs))
  1081. return UnableToLegalize;
  1082. LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
  1083. SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
  1084. if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
  1085. RHSPartRegs, RHSLeftoverRegs))
  1086. return UnableToLegalize;
  1087. // We now have the LHS and RHS of the compare split into narrow-type
  1088. // registers, plus potentially some leftover type.
  1089. Register Dst = MI.getOperand(0).getReg();
  1090. LLT ResTy = MRI.getType(Dst);
  1091. if (ICmpInst::isEquality(Pred)) {
  1092. // For each part on the LHS and RHS, keep track of the result of XOR-ing
  1093. // them together. For each equal part, the result should be all 0s. For
  1094. // each non-equal part, we'll get at least one 1.
  1095. auto Zero = MIRBuilder.buildConstant(NarrowTy, 0);
  1096. SmallVector<Register, 4> Xors;
  1097. for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) {
  1098. auto LHS = std::get<0>(LHSAndRHS);
  1099. auto RHS = std::get<1>(LHSAndRHS);
  1100. auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
  1101. Xors.push_back(Xor);
  1102. }
  1103. // Build a G_XOR for each leftover register. Each G_XOR must be widened
  1104. // to the desired narrow type so that we can OR them together later.
  1105. SmallVector<Register, 4> WidenedXors;
  1106. for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
  1107. auto LHS = std::get<0>(LHSAndRHS);
  1108. auto RHS = std::get<1>(LHSAndRHS);
  1109. auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
  1110. LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor);
  1111. buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
  1112. /* PadStrategy = */ TargetOpcode::G_ZEXT);
  1113. Xors.insert(Xors.end(), WidenedXors.begin(), WidenedXors.end());
  1114. }
  1115. // Now, for each part we broke up, we know if they are equal/not equal
  1116. // based off the G_XOR. We can OR these all together and compare against
  1117. // 0 to get the result.
  1118. assert(Xors.size() >= 2 && "Should have gotten at least two Xors?");
  1119. auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
  1120. for (unsigned I = 2, E = Xors.size(); I < E; ++I)
  1121. Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]);
  1122. MIRBuilder.buildICmp(Pred, Dst, Or, Zero);
  1123. } else {
  1124. // TODO: Handle non-power-of-two types.
  1125. assert(LHSPartRegs.size() == 2 && "Expected exactly 2 LHS part regs?");
  1126. assert(RHSPartRegs.size() == 2 && "Expected exactly 2 RHS part regs?");
  1127. Register LHSL = LHSPartRegs[0];
  1128. Register LHSH = LHSPartRegs[1];
  1129. Register RHSL = RHSPartRegs[0];
  1130. Register RHSH = RHSPartRegs[1];
  1131. MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH);
  1132. MachineInstrBuilder CmpHEQ =
  1133. MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH);
  1134. MachineInstrBuilder CmpLU = MIRBuilder.buildICmp(
  1135. ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL);
  1136. MIRBuilder.buildSelect(Dst, CmpHEQ, CmpLU, CmpH);
  1137. }
  1138. MI.eraseFromParent();
  1139. return Legalized;
  1140. }
  1141. case TargetOpcode::G_SEXT_INREG: {
  1142. if (TypeIdx != 0)
  1143. return UnableToLegalize;
  1144. int64_t SizeInBits = MI.getOperand(2).getImm();
  1145. // So long as the new type has more bits than the bits we're extending we
  1146. // don't need to break it apart.
  1147. if (NarrowTy.getScalarSizeInBits() >= SizeInBits) {
  1148. Observer.changingInstr(MI);
  1149. // We don't lose any non-extension bits by truncating the src and
  1150. // sign-extending the dst.
  1151. MachineOperand &MO1 = MI.getOperand(1);
  1152. auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
  1153. MO1.setReg(TruncMIB.getReg(0));
  1154. MachineOperand &MO2 = MI.getOperand(0);
  1155. Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
  1156. MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
  1157. MIRBuilder.buildSExt(MO2, DstExt);
  1158. MO2.setReg(DstExt);
  1159. Observer.changedInstr(MI);
  1160. return Legalized;
  1161. }
  1162. // Break it apart. Components below the extension point are unmodified. The
  1163. // component containing the extension point becomes a narrower SEXT_INREG.
  1164. // Components above it are ashr'd from the component containing the
  1165. // extension point.
  1166. if (SizeOp0 % NarrowSize != 0)
  1167. return UnableToLegalize;
  1168. int NumParts = SizeOp0 / NarrowSize;
  1169. // List the registers where the destination will be scattered.
  1170. SmallVector<Register, 2> DstRegs;
  1171. // List the registers where the source will be split.
  1172. SmallVector<Register, 2> SrcRegs;
  1173. // Create all the temporary registers.
  1174. for (int i = 0; i < NumParts; ++i) {
  1175. Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
  1176. SrcRegs.push_back(SrcReg);
  1177. }
  1178. // Explode the big arguments into smaller chunks.
  1179. MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
  1180. Register AshrCstReg =
  1181. MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
  1182. .getReg(0);
  1183. Register FullExtensionReg = 0;
  1184. Register PartialExtensionReg = 0;
  1185. // Do the operation on each small part.
  1186. for (int i = 0; i < NumParts; ++i) {
  1187. if ((i + 1) * NarrowTy.getScalarSizeInBits() < SizeInBits)
  1188. DstRegs.push_back(SrcRegs[i]);
  1189. else if (i * NarrowTy.getScalarSizeInBits() > SizeInBits) {
  1190. assert(PartialExtensionReg &&
  1191. "Expected to visit partial extension before full");
  1192. if (FullExtensionReg) {
  1193. DstRegs.push_back(FullExtensionReg);
  1194. continue;
  1195. }
  1196. DstRegs.push_back(
  1197. MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
  1198. .getReg(0));
  1199. FullExtensionReg = DstRegs.back();
  1200. } else {
  1201. DstRegs.push_back(
  1202. MIRBuilder
  1203. .buildInstr(
  1204. TargetOpcode::G_SEXT_INREG, {NarrowTy},
  1205. {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
  1206. .getReg(0));
  1207. PartialExtensionReg = DstRegs.back();
  1208. }
  1209. }
  1210. // Gather the destination registers into the final destination.
  1211. Register DstReg = MI.getOperand(0).getReg();
  1212. MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
  1213. MI.eraseFromParent();
  1214. return Legalized;
  1215. }
  1216. case TargetOpcode::G_BSWAP:
  1217. case TargetOpcode::G_BITREVERSE: {
  1218. if (SizeOp0 % NarrowSize != 0)
  1219. return UnableToLegalize;
  1220. Observer.changingInstr(MI);
  1221. SmallVector<Register, 2> SrcRegs, DstRegs;
  1222. unsigned NumParts = SizeOp0 / NarrowSize;
  1223. extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
  1224. for (unsigned i = 0; i < NumParts; ++i) {
  1225. auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
  1226. {SrcRegs[NumParts - 1 - i]});
  1227. DstRegs.push_back(DstPart.getReg(0));
  1228. }
  1229. MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
  1230. Observer.changedInstr(MI);
  1231. MI.eraseFromParent();
  1232. return Legalized;
  1233. }
  1234. case TargetOpcode::G_PTR_ADD:
  1235. case TargetOpcode::G_PTRMASK: {
  1236. if (TypeIdx != 1)
  1237. return UnableToLegalize;
  1238. Observer.changingInstr(MI);
  1239. narrowScalarSrc(MI, NarrowTy, 2);
  1240. Observer.changedInstr(MI);
  1241. return Legalized;
  1242. }
  1243. case TargetOpcode::G_FPTOUI:
  1244. case TargetOpcode::G_FPTOSI:
  1245. return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
  1246. case TargetOpcode::G_FPEXT:
  1247. if (TypeIdx != 0)
  1248. return UnableToLegalize;
  1249. Observer.changingInstr(MI);
  1250. narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
  1251. Observer.changedInstr(MI);
  1252. return Legalized;
  1253. }
  1254. }
  1255. Register LegalizerHelper::coerceToScalar(Register Val) {
  1256. LLT Ty = MRI.getType(Val);
  1257. if (Ty.isScalar())
  1258. return Val;
  1259. const DataLayout &DL = MIRBuilder.getDataLayout();
  1260. LLT NewTy = LLT::scalar(Ty.getSizeInBits());
  1261. if (Ty.isPointer()) {
  1262. if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
  1263. return Register();
  1264. return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
  1265. }
  1266. Register NewVal = Val;
  1267. assert(Ty.isVector());
  1268. LLT EltTy = Ty.getElementType();
  1269. if (EltTy.isPointer())
  1270. NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
  1271. return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
  1272. }
  1273. void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy,
  1274. unsigned OpIdx, unsigned ExtOpcode) {
  1275. MachineOperand &MO = MI.getOperand(OpIdx);
  1276. auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
  1277. MO.setReg(ExtB.getReg(0));
  1278. }
  1279. void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy,
  1280. unsigned OpIdx) {
  1281. MachineOperand &MO = MI.getOperand(OpIdx);
  1282. auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
  1283. MO.setReg(ExtB.getReg(0));
  1284. }
  1285. void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
  1286. unsigned OpIdx, unsigned TruncOpcode) {
  1287. MachineOperand &MO = MI.getOperand(OpIdx);
  1288. Register DstExt = MRI.createGenericVirtualRegister(WideTy);
  1289. MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
  1290. MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
  1291. MO.setReg(DstExt);
  1292. }
  1293. void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy,
  1294. unsigned OpIdx, unsigned ExtOpcode) {
  1295. MachineOperand &MO = MI.getOperand(OpIdx);
  1296. Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
  1297. MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
  1298. MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
  1299. MO.setReg(DstTrunc);
  1300. }
  1301. void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy,
  1302. unsigned OpIdx) {
  1303. MachineOperand &MO = MI.getOperand(OpIdx);
  1304. MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
  1305. Register Dst = MO.getReg();
  1306. Register DstExt = MRI.createGenericVirtualRegister(WideTy);
  1307. MO.setReg(DstExt);
  1308. MIRBuilder.buildDeleteTrailingVectorElements(Dst, DstExt);
  1309. }
  1310. void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy,
  1311. unsigned OpIdx) {
  1312. MachineOperand &MO = MI.getOperand(OpIdx);
  1313. SmallVector<Register, 8> Regs;
  1314. MO.setReg(MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO).getReg(0));
  1315. }
  1316. void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
  1317. MachineOperand &Op = MI.getOperand(OpIdx);
  1318. Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
  1319. }
  1320. void LegalizerHelper::bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
  1321. MachineOperand &MO = MI.getOperand(OpIdx);
  1322. Register CastDst = MRI.createGenericVirtualRegister(CastTy);
  1323. MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
  1324. MIRBuilder.buildBitcast(MO, CastDst);
  1325. MO.setReg(CastDst);
  1326. }
  1327. LegalizerHelper::LegalizeResult
  1328. LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
  1329. LLT WideTy) {
  1330. if (TypeIdx != 1)
  1331. return UnableToLegalize;
  1332. Register DstReg = MI.getOperand(0).getReg();
  1333. LLT DstTy = MRI.getType(DstReg);
  1334. if (DstTy.isVector())
  1335. return UnableToLegalize;
  1336. Register Src1 = MI.getOperand(1).getReg();
  1337. LLT SrcTy = MRI.getType(Src1);
  1338. const int DstSize = DstTy.getSizeInBits();
  1339. const int SrcSize = SrcTy.getSizeInBits();
  1340. const int WideSize = WideTy.getSizeInBits();
  1341. const int NumMerge = (DstSize + WideSize - 1) / WideSize;
  1342. unsigned NumOps = MI.getNumOperands();
  1343. unsigned NumSrc = MI.getNumOperands() - 1;
  1344. unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
  1345. if (WideSize >= DstSize) {
  1346. // Directly pack the bits in the target type.
  1347. Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1).getReg(0);
  1348. for (unsigned I = 2; I != NumOps; ++I) {
  1349. const unsigned Offset = (I - 1) * PartSize;
  1350. Register SrcReg = MI.getOperand(I).getReg();
  1351. assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
  1352. auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
  1353. Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
  1354. MRI.createGenericVirtualRegister(WideTy);
  1355. auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
  1356. auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
  1357. MIRBuilder.buildOr(NextResult, ResultReg, Shl);
  1358. ResultReg = NextResult;
  1359. }
  1360. if (WideSize > DstSize)
  1361. MIRBuilder.buildTrunc(DstReg, ResultReg);
  1362. else if (DstTy.isPointer())
  1363. MIRBuilder.buildIntToPtr(DstReg, ResultReg);
  1364. MI.eraseFromParent();
  1365. return Legalized;
  1366. }
  1367. // Unmerge the original values to the GCD type, and recombine to the next
  1368. // multiple greater than the original type.
  1369. //
  1370. // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
  1371. // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
  1372. // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
  1373. // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
  1374. // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
  1375. // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
  1376. // %12:_(s12) = G_MERGE_VALUES %10, %11
  1377. //
  1378. // Padding with undef if necessary:
  1379. //
  1380. // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
  1381. // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
  1382. // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
  1383. // %7:_(s2) = G_IMPLICIT_DEF
  1384. // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
  1385. // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
  1386. // %10:_(s12) = G_MERGE_VALUES %8, %9
  1387. const int GCD = std::gcd(SrcSize, WideSize);
  1388. LLT GCDTy = LLT::scalar(GCD);
  1389. SmallVector<Register, 8> Parts;
  1390. SmallVector<Register, 8> NewMergeRegs;
  1391. SmallVector<Register, 8> Unmerges;
  1392. LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
  1393. // Decompose the original operands if they don't evenly divide.
  1394. for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
  1395. Register SrcReg = MO.getReg();
  1396. if (GCD == SrcSize) {
  1397. Unmerges.push_back(SrcReg);
  1398. } else {
  1399. auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
  1400. for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
  1401. Unmerges.push_back(Unmerge.getReg(J));
  1402. }
  1403. }
  1404. // Pad with undef to the next size that is a multiple of the requested size.
  1405. if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
  1406. Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
  1407. for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
  1408. Unmerges.push_back(UndefReg);
  1409. }
  1410. const int PartsPerGCD = WideSize / GCD;
  1411. // Build merges of each piece.
  1412. ArrayRef<Register> Slicer(Unmerges);
  1413. for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
  1414. auto Merge =
  1415. MIRBuilder.buildMergeLikeInstr(WideTy, Slicer.take_front(PartsPerGCD));
  1416. NewMergeRegs.push_back(Merge.getReg(0));
  1417. }
  1418. // A truncate may be necessary if the requested type doesn't evenly divide the
  1419. // original result type.
  1420. if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
  1421. MIRBuilder.buildMergeLikeInstr(DstReg, NewMergeRegs);
  1422. } else {
  1423. auto FinalMerge = MIRBuilder.buildMergeLikeInstr(WideDstTy, NewMergeRegs);
  1424. MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
  1425. }
  1426. MI.eraseFromParent();
  1427. return Legalized;
  1428. }
  1429. LegalizerHelper::LegalizeResult
  1430. LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
  1431. LLT WideTy) {
  1432. if (TypeIdx != 0)
  1433. return UnableToLegalize;
  1434. int NumDst = MI.getNumOperands() - 1;
  1435. Register SrcReg = MI.getOperand(NumDst).getReg();
  1436. LLT SrcTy = MRI.getType(SrcReg);
  1437. if (SrcTy.isVector())
  1438. return UnableToLegalize;
  1439. Register Dst0Reg = MI.getOperand(0).getReg();
  1440. LLT DstTy = MRI.getType(Dst0Reg);
  1441. if (!DstTy.isScalar())
  1442. return UnableToLegalize;
  1443. if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
  1444. if (SrcTy.isPointer()) {
  1445. const DataLayout &DL = MIRBuilder.getDataLayout();
  1446. if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
  1447. LLVM_DEBUG(
  1448. dbgs() << "Not casting non-integral address space integer\n");
  1449. return UnableToLegalize;
  1450. }
  1451. SrcTy = LLT::scalar(SrcTy.getSizeInBits());
  1452. SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
  1453. }
  1454. // Widen SrcTy to WideTy. This does not affect the result, but since the
  1455. // user requested this size, it is probably better handled than SrcTy and
  1456. // should reduce the total number of legalization artifacts.
  1457. if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
  1458. SrcTy = WideTy;
  1459. SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
  1460. }
  1461. // Theres no unmerge type to target. Directly extract the bits from the
  1462. // source type
  1463. unsigned DstSize = DstTy.getSizeInBits();
  1464. MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
  1465. for (int I = 1; I != NumDst; ++I) {
  1466. auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
  1467. auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
  1468. MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
  1469. }
  1470. MI.eraseFromParent();
  1471. return Legalized;
  1472. }
  1473. // Extend the source to a wider type.
  1474. LLT LCMTy = getLCMType(SrcTy, WideTy);
  1475. Register WideSrc = SrcReg;
  1476. if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
  1477. // TODO: If this is an integral address space, cast to integer and anyext.
  1478. if (SrcTy.isPointer()) {
  1479. LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
  1480. return UnableToLegalize;
  1481. }
  1482. WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
  1483. }
  1484. auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
  1485. // Create a sequence of unmerges and merges to the original results. Since we
  1486. // may have widened the source, we will need to pad the results with dead defs
  1487. // to cover the source register.
  1488. // e.g. widen s48 to s64:
  1489. // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
  1490. //
  1491. // =>
  1492. // %4:_(s192) = G_ANYEXT %0:_(s96)
  1493. // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
  1494. // ; unpack to GCD type, with extra dead defs
  1495. // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
  1496. // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
  1497. // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
  1498. // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
  1499. // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
  1500. const LLT GCDTy = getGCDType(WideTy, DstTy);
  1501. const int NumUnmerge = Unmerge->getNumOperands() - 1;
  1502. const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
  1503. // Directly unmerge to the destination without going through a GCD type
  1504. // if possible
  1505. if (PartsPerRemerge == 1) {
  1506. const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
  1507. for (int I = 0; I != NumUnmerge; ++I) {
  1508. auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
  1509. for (int J = 0; J != PartsPerUnmerge; ++J) {
  1510. int Idx = I * PartsPerUnmerge + J;
  1511. if (Idx < NumDst)
  1512. MIB.addDef(MI.getOperand(Idx).getReg());
  1513. else {
  1514. // Create dead def for excess components.
  1515. MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
  1516. }
  1517. }
  1518. MIB.addUse(Unmerge.getReg(I));
  1519. }
  1520. } else {
  1521. SmallVector<Register, 16> Parts;
  1522. for (int J = 0; J != NumUnmerge; ++J)
  1523. extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
  1524. SmallVector<Register, 8> RemergeParts;
  1525. for (int I = 0; I != NumDst; ++I) {
  1526. for (int J = 0; J < PartsPerRemerge; ++J) {
  1527. const int Idx = I * PartsPerRemerge + J;
  1528. RemergeParts.emplace_back(Parts[Idx]);
  1529. }
  1530. MIRBuilder.buildMergeLikeInstr(MI.getOperand(I).getReg(), RemergeParts);
  1531. RemergeParts.clear();
  1532. }
  1533. }
  1534. MI.eraseFromParent();
  1535. return Legalized;
  1536. }
  1537. LegalizerHelper::LegalizeResult
  1538. LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
  1539. LLT WideTy) {
  1540. Register DstReg = MI.getOperand(0).getReg();
  1541. Register SrcReg = MI.getOperand(1).getReg();
  1542. LLT SrcTy = MRI.getType(SrcReg);
  1543. LLT DstTy = MRI.getType(DstReg);
  1544. unsigned Offset = MI.getOperand(2).getImm();
  1545. if (TypeIdx == 0) {
  1546. if (SrcTy.isVector() || DstTy.isVector())
  1547. return UnableToLegalize;
  1548. SrcOp Src(SrcReg);
  1549. if (SrcTy.isPointer()) {
  1550. // Extracts from pointers can be handled only if they are really just
  1551. // simple integers.
  1552. const DataLayout &DL = MIRBuilder.getDataLayout();
  1553. if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
  1554. return UnableToLegalize;
  1555. LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
  1556. Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
  1557. SrcTy = SrcAsIntTy;
  1558. }
  1559. if (DstTy.isPointer())
  1560. return UnableToLegalize;
  1561. if (Offset == 0) {
  1562. // Avoid a shift in the degenerate case.
  1563. MIRBuilder.buildTrunc(DstReg,
  1564. MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
  1565. MI.eraseFromParent();
  1566. return Legalized;
  1567. }
  1568. // Do a shift in the source type.
  1569. LLT ShiftTy = SrcTy;
  1570. if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
  1571. Src = MIRBuilder.buildAnyExt(WideTy, Src);
  1572. ShiftTy = WideTy;
  1573. }
  1574. auto LShr = MIRBuilder.buildLShr(
  1575. ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
  1576. MIRBuilder.buildTrunc(DstReg, LShr);
  1577. MI.eraseFromParent();
  1578. return Legalized;
  1579. }
  1580. if (SrcTy.isScalar()) {
  1581. Observer.changingInstr(MI);
  1582. widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
  1583. Observer.changedInstr(MI);
  1584. return Legalized;
  1585. }
  1586. if (!SrcTy.isVector())
  1587. return UnableToLegalize;
  1588. if (DstTy != SrcTy.getElementType())
  1589. return UnableToLegalize;
  1590. if (Offset % SrcTy.getScalarSizeInBits() != 0)
  1591. return UnableToLegalize;
  1592. Observer.changingInstr(MI);
  1593. widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
  1594. MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
  1595. Offset);
  1596. widenScalarDst(MI, WideTy.getScalarType(), 0);
  1597. Observer.changedInstr(MI);
  1598. return Legalized;
  1599. }
  1600. LegalizerHelper::LegalizeResult
  1601. LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
  1602. LLT WideTy) {
  1603. if (TypeIdx != 0 || WideTy.isVector())
  1604. return UnableToLegalize;
  1605. Observer.changingInstr(MI);
  1606. widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
  1607. widenScalarDst(MI, WideTy);
  1608. Observer.changedInstr(MI);
  1609. return Legalized;
  1610. }
  1611. LegalizerHelper::LegalizeResult
  1612. LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
  1613. LLT WideTy) {
  1614. unsigned Opcode;
  1615. unsigned ExtOpcode;
  1616. std::optional<Register> CarryIn;
  1617. switch (MI.getOpcode()) {
  1618. default:
  1619. llvm_unreachable("Unexpected opcode!");
  1620. case TargetOpcode::G_SADDO:
  1621. Opcode = TargetOpcode::G_ADD;
  1622. ExtOpcode = TargetOpcode::G_SEXT;
  1623. break;
  1624. case TargetOpcode::G_SSUBO:
  1625. Opcode = TargetOpcode::G_SUB;
  1626. ExtOpcode = TargetOpcode::G_SEXT;
  1627. break;
  1628. case TargetOpcode::G_UADDO:
  1629. Opcode = TargetOpcode::G_ADD;
  1630. ExtOpcode = TargetOpcode::G_ZEXT;
  1631. break;
  1632. case TargetOpcode::G_USUBO:
  1633. Opcode = TargetOpcode::G_SUB;
  1634. ExtOpcode = TargetOpcode::G_ZEXT;
  1635. break;
  1636. case TargetOpcode::G_SADDE:
  1637. Opcode = TargetOpcode::G_UADDE;
  1638. ExtOpcode = TargetOpcode::G_SEXT;
  1639. CarryIn = MI.getOperand(4).getReg();
  1640. break;
  1641. case TargetOpcode::G_SSUBE:
  1642. Opcode = TargetOpcode::G_USUBE;
  1643. ExtOpcode = TargetOpcode::G_SEXT;
  1644. CarryIn = MI.getOperand(4).getReg();
  1645. break;
  1646. case TargetOpcode::G_UADDE:
  1647. Opcode = TargetOpcode::G_UADDE;
  1648. ExtOpcode = TargetOpcode::G_ZEXT;
  1649. CarryIn = MI.getOperand(4).getReg();
  1650. break;
  1651. case TargetOpcode::G_USUBE:
  1652. Opcode = TargetOpcode::G_USUBE;
  1653. ExtOpcode = TargetOpcode::G_ZEXT;
  1654. CarryIn = MI.getOperand(4).getReg();
  1655. break;
  1656. }
  1657. if (TypeIdx == 1) {
  1658. unsigned BoolExtOp = MIRBuilder.getBoolExtOp(WideTy.isVector(), false);
  1659. Observer.changingInstr(MI);
  1660. if (CarryIn)
  1661. widenScalarSrc(MI, WideTy, 4, BoolExtOp);
  1662. widenScalarDst(MI, WideTy, 1);
  1663. Observer.changedInstr(MI);
  1664. return Legalized;
  1665. }
  1666. auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
  1667. auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
  1668. // Do the arithmetic in the larger type.
  1669. Register NewOp;
  1670. if (CarryIn) {
  1671. LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
  1672. NewOp = MIRBuilder
  1673. .buildInstr(Opcode, {WideTy, CarryOutTy},
  1674. {LHSExt, RHSExt, *CarryIn})
  1675. .getReg(0);
  1676. } else {
  1677. NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
  1678. }
  1679. LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
  1680. auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
  1681. auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
  1682. // There is no overflow if the ExtOp is the same as NewOp.
  1683. MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
  1684. // Now trunc the NewOp to the original result.
  1685. MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
  1686. MI.eraseFromParent();
  1687. return Legalized;
  1688. }
  1689. LegalizerHelper::LegalizeResult
  1690. LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
  1691. LLT WideTy) {
  1692. bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
  1693. MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
  1694. MI.getOpcode() == TargetOpcode::G_SSHLSAT;
  1695. bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
  1696. MI.getOpcode() == TargetOpcode::G_USHLSAT;
  1697. // We can convert this to:
  1698. // 1. Any extend iN to iM
  1699. // 2. SHL by M-N
  1700. // 3. [US][ADD|SUB|SHL]SAT
  1701. // 4. L/ASHR by M-N
  1702. //
  1703. // It may be more efficient to lower this to a min and a max operation in
  1704. // the higher precision arithmetic if the promoted operation isn't legal,
  1705. // but this decision is up to the target's lowering request.
  1706. Register DstReg = MI.getOperand(0).getReg();
  1707. unsigned NewBits = WideTy.getScalarSizeInBits();
  1708. unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
  1709. // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
  1710. // must not left shift the RHS to preserve the shift amount.
  1711. auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
  1712. auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
  1713. : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
  1714. auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
  1715. auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
  1716. auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
  1717. auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
  1718. {ShiftL, ShiftR}, MI.getFlags());
  1719. // Use a shift that will preserve the number of sign bits when the trunc is
  1720. // folded away.
  1721. auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
  1722. : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
  1723. MIRBuilder.buildTrunc(DstReg, Result);
  1724. MI.eraseFromParent();
  1725. return Legalized;
  1726. }
  1727. LegalizerHelper::LegalizeResult
  1728. LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
  1729. LLT WideTy) {
  1730. if (TypeIdx == 1) {
  1731. Observer.changingInstr(MI);
  1732. widenScalarDst(MI, WideTy, 1);
  1733. Observer.changedInstr(MI);
  1734. return Legalized;
  1735. }
  1736. bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
  1737. Register Result = MI.getOperand(0).getReg();
  1738. Register OriginalOverflow = MI.getOperand(1).getReg();
  1739. Register LHS = MI.getOperand(2).getReg();
  1740. Register RHS = MI.getOperand(3).getReg();
  1741. LLT SrcTy = MRI.getType(LHS);
  1742. LLT OverflowTy = MRI.getType(OriginalOverflow);
  1743. unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
  1744. // To determine if the result overflowed in the larger type, we extend the
  1745. // input to the larger type, do the multiply (checking if it overflows),
  1746. // then also check the high bits of the result to see if overflow happened
  1747. // there.
  1748. unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
  1749. auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
  1750. auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
  1751. auto Mulo = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy, OverflowTy},
  1752. {LeftOperand, RightOperand});
  1753. auto Mul = Mulo->getOperand(0);
  1754. MIRBuilder.buildTrunc(Result, Mul);
  1755. MachineInstrBuilder ExtResult;
  1756. // Overflow occurred if it occurred in the larger type, or if the high part
  1757. // of the result does not zero/sign-extend the low part. Check this second
  1758. // possibility first.
  1759. if (IsSigned) {
  1760. // For signed, overflow occurred when the high part does not sign-extend
  1761. // the low part.
  1762. ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth);
  1763. } else {
  1764. // Unsigned overflow occurred when the high part does not zero-extend the
  1765. // low part.
  1766. ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
  1767. }
  1768. // Multiplication cannot overflow if the WideTy is >= 2 * original width,
  1769. // so we don't need to check the overflow result of larger type Mulo.
  1770. if (WideTy.getScalarSizeInBits() < 2 * SrcBitWidth) {
  1771. auto Overflow =
  1772. MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
  1773. // Finally check if the multiplication in the larger type itself overflowed.
  1774. MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow);
  1775. } else {
  1776. MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult);
  1777. }
  1778. MI.eraseFromParent();
  1779. return Legalized;
  1780. }
  1781. LegalizerHelper::LegalizeResult
  1782. LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
  1783. switch (MI.getOpcode()) {
  1784. default:
  1785. return UnableToLegalize;
  1786. case TargetOpcode::G_ATOMICRMW_XCHG:
  1787. case TargetOpcode::G_ATOMICRMW_ADD:
  1788. case TargetOpcode::G_ATOMICRMW_SUB:
  1789. case TargetOpcode::G_ATOMICRMW_AND:
  1790. case TargetOpcode::G_ATOMICRMW_OR:
  1791. case TargetOpcode::G_ATOMICRMW_XOR:
  1792. case TargetOpcode::G_ATOMICRMW_MIN:
  1793. case TargetOpcode::G_ATOMICRMW_MAX:
  1794. case TargetOpcode::G_ATOMICRMW_UMIN:
  1795. case TargetOpcode::G_ATOMICRMW_UMAX:
  1796. assert(TypeIdx == 0 && "atomicrmw with second scalar type");
  1797. Observer.changingInstr(MI);
  1798. widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
  1799. widenScalarDst(MI, WideTy, 0);
  1800. Observer.changedInstr(MI);
  1801. return Legalized;
  1802. case TargetOpcode::G_ATOMIC_CMPXCHG:
  1803. assert(TypeIdx == 0 && "G_ATOMIC_CMPXCHG with second scalar type");
  1804. Observer.changingInstr(MI);
  1805. widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
  1806. widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
  1807. widenScalarDst(MI, WideTy, 0);
  1808. Observer.changedInstr(MI);
  1809. return Legalized;
  1810. case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
  1811. if (TypeIdx == 0) {
  1812. Observer.changingInstr(MI);
  1813. widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
  1814. widenScalarSrc(MI, WideTy, 4, TargetOpcode::G_ANYEXT);
  1815. widenScalarDst(MI, WideTy, 0);
  1816. Observer.changedInstr(MI);
  1817. return Legalized;
  1818. }
  1819. assert(TypeIdx == 1 &&
  1820. "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
  1821. Observer.changingInstr(MI);
  1822. widenScalarDst(MI, WideTy, 1);
  1823. Observer.changedInstr(MI);
  1824. return Legalized;
  1825. case TargetOpcode::G_EXTRACT:
  1826. return widenScalarExtract(MI, TypeIdx, WideTy);
  1827. case TargetOpcode::G_INSERT:
  1828. return widenScalarInsert(MI, TypeIdx, WideTy);
  1829. case TargetOpcode::G_MERGE_VALUES:
  1830. return widenScalarMergeValues(MI, TypeIdx, WideTy);
  1831. case TargetOpcode::G_UNMERGE_VALUES:
  1832. return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
  1833. case TargetOpcode::G_SADDO:
  1834. case TargetOpcode::G_SSUBO:
  1835. case TargetOpcode::G_UADDO:
  1836. case TargetOpcode::G_USUBO:
  1837. case TargetOpcode::G_SADDE:
  1838. case TargetOpcode::G_SSUBE:
  1839. case TargetOpcode::G_UADDE:
  1840. case TargetOpcode::G_USUBE:
  1841. return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
  1842. case TargetOpcode::G_UMULO:
  1843. case TargetOpcode::G_SMULO:
  1844. return widenScalarMulo(MI, TypeIdx, WideTy);
  1845. case TargetOpcode::G_SADDSAT:
  1846. case TargetOpcode::G_SSUBSAT:
  1847. case TargetOpcode::G_SSHLSAT:
  1848. case TargetOpcode::G_UADDSAT:
  1849. case TargetOpcode::G_USUBSAT:
  1850. case TargetOpcode::G_USHLSAT:
  1851. return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
  1852. case TargetOpcode::G_CTTZ:
  1853. case TargetOpcode::G_CTTZ_ZERO_UNDEF:
  1854. case TargetOpcode::G_CTLZ:
  1855. case TargetOpcode::G_CTLZ_ZERO_UNDEF:
  1856. case TargetOpcode::G_CTPOP: {
  1857. if (TypeIdx == 0) {
  1858. Observer.changingInstr(MI);
  1859. widenScalarDst(MI, WideTy, 0);
  1860. Observer.changedInstr(MI);
  1861. return Legalized;
  1862. }
  1863. Register SrcReg = MI.getOperand(1).getReg();
  1864. // First extend the input.
  1865. unsigned ExtOpc = MI.getOpcode() == TargetOpcode::G_CTTZ ||
  1866. MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF
  1867. ? TargetOpcode::G_ANYEXT
  1868. : TargetOpcode::G_ZEXT;
  1869. auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
  1870. LLT CurTy = MRI.getType(SrcReg);
  1871. unsigned NewOpc = MI.getOpcode();
  1872. if (NewOpc == TargetOpcode::G_CTTZ) {
  1873. // The count is the same in the larger type except if the original
  1874. // value was zero. This can be handled by setting the bit just off
  1875. // the top of the original type.
  1876. auto TopBit =
  1877. APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
  1878. MIBSrc = MIRBuilder.buildOr(
  1879. WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
  1880. // Now we know the operand is non-zero, use the more relaxed opcode.
  1881. NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
  1882. }
  1883. // Perform the operation at the larger size.
  1884. auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
  1885. // This is already the correct result for CTPOP and CTTZs
  1886. if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
  1887. MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
  1888. // The correct result is NewOp - (Difference in widety and current ty).
  1889. unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
  1890. MIBNewOp = MIRBuilder.buildSub(
  1891. WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
  1892. }
  1893. MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
  1894. MI.eraseFromParent();
  1895. return Legalized;
  1896. }
  1897. case TargetOpcode::G_BSWAP: {
  1898. Observer.changingInstr(MI);
  1899. Register DstReg = MI.getOperand(0).getReg();
  1900. Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
  1901. Register DstExt = MRI.createGenericVirtualRegister(WideTy);
  1902. Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
  1903. widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
  1904. MI.getOperand(0).setReg(DstExt);
  1905. MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
  1906. LLT Ty = MRI.getType(DstReg);
  1907. unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
  1908. MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
  1909. MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
  1910. MIRBuilder.buildTrunc(DstReg, ShrReg);
  1911. Observer.changedInstr(MI);
  1912. return Legalized;
  1913. }
  1914. case TargetOpcode::G_BITREVERSE: {
  1915. Observer.changingInstr(MI);
  1916. Register DstReg = MI.getOperand(0).getReg();
  1917. LLT Ty = MRI.getType(DstReg);
  1918. unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
  1919. Register DstExt = MRI.createGenericVirtualRegister(WideTy);
  1920. widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
  1921. MI.getOperand(0).setReg(DstExt);
  1922. MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
  1923. auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
  1924. auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
  1925. MIRBuilder.buildTrunc(DstReg, Shift);
  1926. Observer.changedInstr(MI);
  1927. return Legalized;
  1928. }
  1929. case TargetOpcode::G_FREEZE:
  1930. Observer.changingInstr(MI);
  1931. widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
  1932. widenScalarDst(MI, WideTy);
  1933. Observer.changedInstr(MI);
  1934. return Legalized;
  1935. case TargetOpcode::G_ABS:
  1936. Observer.changingInstr(MI);
  1937. widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
  1938. widenScalarDst(MI, WideTy);
  1939. Observer.changedInstr(MI);
  1940. return Legalized;
  1941. case TargetOpcode::G_ADD:
  1942. case TargetOpcode::G_AND:
  1943. case TargetOpcode::G_MUL:
  1944. case TargetOpcode::G_OR:
  1945. case TargetOpcode::G_XOR:
  1946. case TargetOpcode::G_SUB:
  1947. // Perform operation at larger width (any extension is fines here, high bits
  1948. // don't affect the result) and then truncate the result back to the
  1949. // original type.
  1950. Observer.changingInstr(MI);
  1951. widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
  1952. widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
  1953. widenScalarDst(MI, WideTy);
  1954. Observer.changedInstr(MI);
  1955. return Legalized;
  1956. case TargetOpcode::G_SBFX:
  1957. case TargetOpcode::G_UBFX:
  1958. Observer.changingInstr(MI);
  1959. if (TypeIdx == 0) {
  1960. widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
  1961. widenScalarDst(MI, WideTy);
  1962. } else {
  1963. widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
  1964. widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
  1965. }
  1966. Observer.changedInstr(MI);
  1967. return Legalized;
  1968. case TargetOpcode::G_SHL:
  1969. Observer.changingInstr(MI);
  1970. if (TypeIdx == 0) {
  1971. widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
  1972. widenScalarDst(MI, WideTy);
  1973. } else {
  1974. assert(TypeIdx == 1);
  1975. // The "number of bits to shift" operand must preserve its value as an
  1976. // unsigned integer:
  1977. widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
  1978. }
  1979. Observer.changedInstr(MI);
  1980. return Legalized;
  1981. case TargetOpcode::G_SDIV:
  1982. case TargetOpcode::G_SREM:
  1983. case TargetOpcode::G_SMIN:
  1984. case TargetOpcode::G_SMAX:
  1985. Observer.changingInstr(MI);
  1986. widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
  1987. widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
  1988. widenScalarDst(MI, WideTy);
  1989. Observer.changedInstr(MI);
  1990. return Legalized;
  1991. case TargetOpcode::G_SDIVREM:
  1992. Observer.changingInstr(MI);
  1993. widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
  1994. widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
  1995. widenScalarDst(MI, WideTy);
  1996. widenScalarDst(MI, WideTy, 1);
  1997. Observer.changedInstr(MI);
  1998. return Legalized;
  1999. case TargetOpcode::G_ASHR:
  2000. case TargetOpcode::G_LSHR:
  2001. Observer.changingInstr(MI);
  2002. if (TypeIdx == 0) {
  2003. unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
  2004. TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
  2005. widenScalarSrc(MI, WideTy, 1, CvtOp);
  2006. widenScalarDst(MI, WideTy);
  2007. } else {
  2008. assert(TypeIdx == 1);
  2009. // The "number of bits to shift" operand must preserve its value as an
  2010. // unsigned integer:
  2011. widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
  2012. }
  2013. Observer.changedInstr(MI);
  2014. return Legalized;
  2015. case TargetOpcode::G_UDIV:
  2016. case TargetOpcode::G_UREM:
  2017. case TargetOpcode::G_UMIN:
  2018. case TargetOpcode::G_UMAX:
  2019. Observer.changingInstr(MI);
  2020. widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
  2021. widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
  2022. widenScalarDst(MI, WideTy);
  2023. Observer.changedInstr(MI);
  2024. return Legalized;
  2025. case TargetOpcode::G_UDIVREM:
  2026. Observer.changingInstr(MI);
  2027. widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
  2028. widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
  2029. widenScalarDst(MI, WideTy);
  2030. widenScalarDst(MI, WideTy, 1);
  2031. Observer.changedInstr(MI);
  2032. return Legalized;
  2033. case TargetOpcode::G_SELECT:
  2034. Observer.changingInstr(MI);
  2035. if (TypeIdx == 0) {
  2036. // Perform operation at larger width (any extension is fine here, high
  2037. // bits don't affect the result) and then truncate the result back to the
  2038. // original type.
  2039. widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
  2040. widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
  2041. widenScalarDst(MI, WideTy);
  2042. } else {
  2043. bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
  2044. // Explicit extension is required here since high bits affect the result.
  2045. widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
  2046. }
  2047. Observer.changedInstr(MI);
  2048. return Legalized;
  2049. case TargetOpcode::G_FPTOSI:
  2050. case TargetOpcode::G_FPTOUI:
  2051. Observer.changingInstr(MI);
  2052. if (TypeIdx == 0)
  2053. widenScalarDst(MI, WideTy);
  2054. else
  2055. widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
  2056. Observer.changedInstr(MI);
  2057. return Legalized;
  2058. case TargetOpcode::G_SITOFP:
  2059. Observer.changingInstr(MI);
  2060. if (TypeIdx == 0)
  2061. widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
  2062. else
  2063. widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
  2064. Observer.changedInstr(MI);
  2065. return Legalized;
  2066. case TargetOpcode::G_UITOFP:
  2067. Observer.changingInstr(MI);
  2068. if (TypeIdx == 0)
  2069. widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
  2070. else
  2071. widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
  2072. Observer.changedInstr(MI);
  2073. return Legalized;
  2074. case TargetOpcode::G_LOAD:
  2075. case TargetOpcode::G_SEXTLOAD:
  2076. case TargetOpcode::G_ZEXTLOAD:
  2077. Observer.changingInstr(MI);
  2078. widenScalarDst(MI, WideTy);
  2079. Observer.changedInstr(MI);
  2080. return Legalized;
  2081. case TargetOpcode::G_STORE: {
  2082. if (TypeIdx != 0)
  2083. return UnableToLegalize;
  2084. LLT Ty = MRI.getType(MI.getOperand(0).getReg());
  2085. if (!Ty.isScalar())
  2086. return UnableToLegalize;
  2087. Observer.changingInstr(MI);
  2088. unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
  2089. TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
  2090. widenScalarSrc(MI, WideTy, 0, ExtType);
  2091. Observer.changedInstr(MI);
  2092. return Legalized;
  2093. }
  2094. case TargetOpcode::G_CONSTANT: {
  2095. MachineOperand &SrcMO = MI.getOperand(1);
  2096. LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
  2097. unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
  2098. MRI.getType(MI.getOperand(0).getReg()));
  2099. assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
  2100. ExtOpc == TargetOpcode::G_ANYEXT) &&
  2101. "Illegal Extend");
  2102. const APInt &SrcVal = SrcMO.getCImm()->getValue();
  2103. const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
  2104. ? SrcVal.sext(WideTy.getSizeInBits())
  2105. : SrcVal.zext(WideTy.getSizeInBits());
  2106. Observer.changingInstr(MI);
  2107. SrcMO.setCImm(ConstantInt::get(Ctx, Val));
  2108. widenScalarDst(MI, WideTy);
  2109. Observer.changedInstr(MI);
  2110. return Legalized;
  2111. }
  2112. case TargetOpcode::G_FCONSTANT: {
  2113. // To avoid changing the bits of the constant due to extension to a larger
  2114. // type and then using G_FPTRUNC, we simply convert to a G_CONSTANT.
  2115. MachineOperand &SrcMO = MI.getOperand(1);
  2116. APInt Val = SrcMO.getFPImm()->getValueAPF().bitcastToAPInt();
  2117. MIRBuilder.setInstrAndDebugLoc(MI);
  2118. auto IntCst = MIRBuilder.buildConstant(MI.getOperand(0).getReg(), Val);
  2119. widenScalarDst(*IntCst, WideTy, 0, TargetOpcode::G_TRUNC);
  2120. MI.eraseFromParent();
  2121. return Legalized;
  2122. }
  2123. case TargetOpcode::G_IMPLICIT_DEF: {
  2124. Observer.changingInstr(MI);
  2125. widenScalarDst(MI, WideTy);
  2126. Observer.changedInstr(MI);
  2127. return Legalized;
  2128. }
  2129. case TargetOpcode::G_BRCOND:
  2130. Observer.changingInstr(MI);
  2131. widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
  2132. Observer.changedInstr(MI);
  2133. return Legalized;
  2134. case TargetOpcode::G_FCMP:
  2135. Observer.changingInstr(MI);
  2136. if (TypeIdx == 0)
  2137. widenScalarDst(MI, WideTy);
  2138. else {
  2139. widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
  2140. widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
  2141. }
  2142. Observer.changedInstr(MI);
  2143. return Legalized;
  2144. case TargetOpcode::G_ICMP:
  2145. Observer.changingInstr(MI);
  2146. if (TypeIdx == 0)
  2147. widenScalarDst(MI, WideTy);
  2148. else {
  2149. unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
  2150. MI.getOperand(1).getPredicate()))
  2151. ? TargetOpcode::G_SEXT
  2152. : TargetOpcode::G_ZEXT;
  2153. widenScalarSrc(MI, WideTy, 2, ExtOpcode);
  2154. widenScalarSrc(MI, WideTy, 3, ExtOpcode);
  2155. }
  2156. Observer.changedInstr(MI);
  2157. return Legalized;
  2158. case TargetOpcode::G_PTR_ADD:
  2159. assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
  2160. Observer.changingInstr(MI);
  2161. widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
  2162. Observer.changedInstr(MI);
  2163. return Legalized;
  2164. case TargetOpcode::G_PHI: {
  2165. assert(TypeIdx == 0 && "Expecting only Idx 0");
  2166. Observer.changingInstr(MI);
  2167. for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
  2168. MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
  2169. MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
  2170. widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
  2171. }
  2172. MachineBasicBlock &MBB = *MI.getParent();
  2173. MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
  2174. widenScalarDst(MI, WideTy);
  2175. Observer.changedInstr(MI);
  2176. return Legalized;
  2177. }
  2178. case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
  2179. if (TypeIdx == 0) {
  2180. Register VecReg = MI.getOperand(1).getReg();
  2181. LLT VecTy = MRI.getType(VecReg);
  2182. Observer.changingInstr(MI);
  2183. widenScalarSrc(
  2184. MI, LLT::vector(VecTy.getElementCount(), WideTy.getSizeInBits()), 1,
  2185. TargetOpcode::G_ANYEXT);
  2186. widenScalarDst(MI, WideTy, 0);
  2187. Observer.changedInstr(MI);
  2188. return Legalized;
  2189. }
  2190. if (TypeIdx != 2)
  2191. return UnableToLegalize;
  2192. Observer.changingInstr(MI);
  2193. // TODO: Probably should be zext
  2194. widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
  2195. Observer.changedInstr(MI);
  2196. return Legalized;
  2197. }
  2198. case TargetOpcode::G_INSERT_VECTOR_ELT: {
  2199. if (TypeIdx == 1) {
  2200. Observer.changingInstr(MI);
  2201. Register VecReg = MI.getOperand(1).getReg();
  2202. LLT VecTy = MRI.getType(VecReg);
  2203. LLT WideVecTy = LLT::vector(VecTy.getElementCount(), WideTy);
  2204. widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
  2205. widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
  2206. widenScalarDst(MI, WideVecTy, 0);
  2207. Observer.changedInstr(MI);
  2208. return Legalized;
  2209. }
  2210. if (TypeIdx == 2) {
  2211. Observer.changingInstr(MI);
  2212. // TODO: Probably should be zext
  2213. widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
  2214. Observer.changedInstr(MI);
  2215. return Legalized;
  2216. }
  2217. return UnableToLegalize;
  2218. }
  2219. case TargetOpcode::G_FADD:
  2220. case TargetOpcode::G_FMUL:
  2221. case TargetOpcode::G_FSUB:
  2222. case TargetOpcode::G_FMA:
  2223. case TargetOpcode::G_FMAD:
  2224. case TargetOpcode::G_FNEG:
  2225. case TargetOpcode::G_FABS:
  2226. case TargetOpcode::G_FCANONICALIZE:
  2227. case TargetOpcode::G_FMINNUM:
  2228. case TargetOpcode::G_FMAXNUM:
  2229. case TargetOpcode::G_FMINNUM_IEEE:
  2230. case TargetOpcode::G_FMAXNUM_IEEE:
  2231. case TargetOpcode::G_FMINIMUM:
  2232. case TargetOpcode::G_FMAXIMUM:
  2233. case TargetOpcode::G_FDIV:
  2234. case TargetOpcode::G_FREM:
  2235. case TargetOpcode::G_FCEIL:
  2236. case TargetOpcode::G_FFLOOR:
  2237. case TargetOpcode::G_FCOS:
  2238. case TargetOpcode::G_FSIN:
  2239. case TargetOpcode::G_FLOG10:
  2240. case TargetOpcode::G_FLOG:
  2241. case TargetOpcode::G_FLOG2:
  2242. case TargetOpcode::G_FRINT:
  2243. case TargetOpcode::G_FNEARBYINT:
  2244. case TargetOpcode::G_FSQRT:
  2245. case TargetOpcode::G_FEXP:
  2246. case TargetOpcode::G_FEXP2:
  2247. case TargetOpcode::G_FPOW:
  2248. case TargetOpcode::G_INTRINSIC_TRUNC:
  2249. case TargetOpcode::G_INTRINSIC_ROUND:
  2250. case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
  2251. assert(TypeIdx == 0);
  2252. Observer.changingInstr(MI);
  2253. for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
  2254. widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
  2255. widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
  2256. Observer.changedInstr(MI);
  2257. return Legalized;
  2258. case TargetOpcode::G_FPOWI: {
  2259. if (TypeIdx != 0)
  2260. return UnableToLegalize;
  2261. Observer.changingInstr(MI);
  2262. widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
  2263. widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
  2264. Observer.changedInstr(MI);
  2265. return Legalized;
  2266. }
  2267. case TargetOpcode::G_INTTOPTR:
  2268. if (TypeIdx != 1)
  2269. return UnableToLegalize;
  2270. Observer.changingInstr(MI);
  2271. widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
  2272. Observer.changedInstr(MI);
  2273. return Legalized;
  2274. case TargetOpcode::G_PTRTOINT:
  2275. if (TypeIdx != 0)
  2276. return UnableToLegalize;
  2277. Observer.changingInstr(MI);
  2278. widenScalarDst(MI, WideTy, 0);
  2279. Observer.changedInstr(MI);
  2280. return Legalized;
  2281. case TargetOpcode::G_BUILD_VECTOR: {
  2282. Observer.changingInstr(MI);
  2283. const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
  2284. for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
  2285. widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
  2286. // Avoid changing the result vector type if the source element type was
  2287. // requested.
  2288. if (TypeIdx == 1) {
  2289. MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
  2290. } else {
  2291. widenScalarDst(MI, WideTy, 0);
  2292. }
  2293. Observer.changedInstr(MI);
  2294. return Legalized;
  2295. }
  2296. case TargetOpcode::G_SEXT_INREG:
  2297. if (TypeIdx != 0)
  2298. return UnableToLegalize;
  2299. Observer.changingInstr(MI);
  2300. widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
  2301. widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
  2302. Observer.changedInstr(MI);
  2303. return Legalized;
  2304. case TargetOpcode::G_PTRMASK: {
  2305. if (TypeIdx != 1)
  2306. return UnableToLegalize;
  2307. Observer.changingInstr(MI);
  2308. widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
  2309. Observer.changedInstr(MI);
  2310. return Legalized;
  2311. }
  2312. }
  2313. }
  2314. static void getUnmergePieces(SmallVectorImpl<Register> &Pieces,
  2315. MachineIRBuilder &B, Register Src, LLT Ty) {
  2316. auto Unmerge = B.buildUnmerge(Ty, Src);
  2317. for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
  2318. Pieces.push_back(Unmerge.getReg(I));
  2319. }
  2320. LegalizerHelper::LegalizeResult
  2321. LegalizerHelper::lowerBitcast(MachineInstr &MI) {
  2322. Register Dst = MI.getOperand(0).getReg();
  2323. Register Src = MI.getOperand(1).getReg();
  2324. LLT DstTy = MRI.getType(Dst);
  2325. LLT SrcTy = MRI.getType(Src);
  2326. if (SrcTy.isVector()) {
  2327. LLT SrcEltTy = SrcTy.getElementType();
  2328. SmallVector<Register, 8> SrcRegs;
  2329. if (DstTy.isVector()) {
  2330. int NumDstElt = DstTy.getNumElements();
  2331. int NumSrcElt = SrcTy.getNumElements();
  2332. LLT DstEltTy = DstTy.getElementType();
  2333. LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
  2334. LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
  2335. // If there's an element size mismatch, insert intermediate casts to match
  2336. // the result element type.
  2337. if (NumSrcElt < NumDstElt) { // Source element type is larger.
  2338. // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
  2339. //
  2340. // =>
  2341. //
  2342. // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
  2343. // %3:_(<2 x s8>) = G_BITCAST %2
  2344. // %4:_(<2 x s8>) = G_BITCAST %3
  2345. // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
  2346. DstCastTy = LLT::fixed_vector(NumDstElt / NumSrcElt, DstEltTy);
  2347. SrcPartTy = SrcEltTy;
  2348. } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
  2349. //
  2350. // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
  2351. //
  2352. // =>
  2353. //
  2354. // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
  2355. // %3:_(s16) = G_BITCAST %2
  2356. // %4:_(s16) = G_BITCAST %3
  2357. // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
  2358. SrcPartTy = LLT::fixed_vector(NumSrcElt / NumDstElt, SrcEltTy);
  2359. DstCastTy = DstEltTy;
  2360. }
  2361. getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
  2362. for (Register &SrcReg : SrcRegs)
  2363. SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
  2364. } else
  2365. getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
  2366. MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
  2367. MI.eraseFromParent();
  2368. return Legalized;
  2369. }
  2370. if (DstTy.isVector()) {
  2371. SmallVector<Register, 8> SrcRegs;
  2372. getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
  2373. MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
  2374. MI.eraseFromParent();
  2375. return Legalized;
  2376. }
  2377. return UnableToLegalize;
  2378. }
  2379. /// Figure out the bit offset into a register when coercing a vector index for
  2380. /// the wide element type. This is only for the case when promoting vector to
  2381. /// one with larger elements.
  2382. //
  2383. ///
  2384. /// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
  2385. /// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
  2386. static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B,
  2387. Register Idx,
  2388. unsigned NewEltSize,
  2389. unsigned OldEltSize) {
  2390. const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
  2391. LLT IdxTy = B.getMRI()->getType(Idx);
  2392. // Now figure out the amount we need to shift to get the target bits.
  2393. auto OffsetMask = B.buildConstant(
  2394. IdxTy, ~(APInt::getAllOnes(IdxTy.getSizeInBits()) << Log2EltRatio));
  2395. auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
  2396. return B.buildShl(IdxTy, OffsetIdx,
  2397. B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
  2398. }
  2399. /// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
  2400. /// is casting to a vector with a smaller element size, perform multiple element
  2401. /// extracts and merge the results. If this is coercing to a vector with larger
  2402. /// elements, index the bitcasted vector and extract the target element with bit
  2403. /// operations. This is intended to force the indexing in the native register
  2404. /// size for architectures that can dynamically index the register file.
  2405. LegalizerHelper::LegalizeResult
  2406. LegalizerHelper::bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx,
  2407. LLT CastTy) {
  2408. if (TypeIdx != 1)
  2409. return UnableToLegalize;
  2410. Register Dst = MI.getOperand(0).getReg();
  2411. Register SrcVec = MI.getOperand(1).getReg();
  2412. Register Idx = MI.getOperand(2).getReg();
  2413. LLT SrcVecTy = MRI.getType(SrcVec);
  2414. LLT IdxTy = MRI.getType(Idx);
  2415. LLT SrcEltTy = SrcVecTy.getElementType();
  2416. unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
  2417. unsigned OldNumElts = SrcVecTy.getNumElements();
  2418. LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
  2419. Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
  2420. const unsigned NewEltSize = NewEltTy.getSizeInBits();
  2421. const unsigned OldEltSize = SrcEltTy.getSizeInBits();
  2422. if (NewNumElts > OldNumElts) {
  2423. // Decreasing the vector element size
  2424. //
  2425. // e.g. i64 = extract_vector_elt x:v2i64, y:i32
  2426. // =>
  2427. // v4i32:castx = bitcast x:v2i64
  2428. //
  2429. // i64 = bitcast
  2430. // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
  2431. // (i32 (extract_vector_elt castx, (2 * y + 1)))
  2432. //
  2433. if (NewNumElts % OldNumElts != 0)
  2434. return UnableToLegalize;
  2435. // Type of the intermediate result vector.
  2436. const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
  2437. LLT MidTy =
  2438. LLT::scalarOrVector(ElementCount::getFixed(NewEltsPerOldElt), NewEltTy);
  2439. auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
  2440. SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
  2441. auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
  2442. for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
  2443. auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
  2444. auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
  2445. auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
  2446. NewOps[I] = Elt.getReg(0);
  2447. }
  2448. auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
  2449. MIRBuilder.buildBitcast(Dst, NewVec);
  2450. MI.eraseFromParent();
  2451. return Legalized;
  2452. }
  2453. if (NewNumElts < OldNumElts) {
  2454. if (NewEltSize % OldEltSize != 0)
  2455. return UnableToLegalize;
  2456. // This only depends on powers of 2 because we use bit tricks to figure out
  2457. // the bit offset we need to shift to get the target element. A general
  2458. // expansion could emit division/multiply.
  2459. if (!isPowerOf2_32(NewEltSize / OldEltSize))
  2460. return UnableToLegalize;
  2461. // Increasing the vector element size.
  2462. // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
  2463. //
  2464. // =>
  2465. //
  2466. // %cast = G_BITCAST %vec
  2467. // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
  2468. // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
  2469. // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
  2470. // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
  2471. // %elt_bits = G_LSHR %wide_elt, %offset_bits
  2472. // %elt = G_TRUNC %elt_bits
  2473. const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
  2474. auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
  2475. // Divide to get the index in the wider element type.
  2476. auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
  2477. Register WideElt = CastVec;
  2478. if (CastTy.isVector()) {
  2479. WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
  2480. ScaledIdx).getReg(0);
  2481. }
  2482. // Compute the bit offset into the register of the target element.
  2483. Register OffsetBits = getBitcastWiderVectorElementOffset(
  2484. MIRBuilder, Idx, NewEltSize, OldEltSize);
  2485. // Shift the wide element to get the target element.
  2486. auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
  2487. MIRBuilder.buildTrunc(Dst, ExtractedBits);
  2488. MI.eraseFromParent();
  2489. return Legalized;
  2490. }
  2491. return UnableToLegalize;
  2492. }
  2493. /// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
  2494. /// TargetReg, while preserving other bits in \p TargetReg.
  2495. ///
  2496. /// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
  2497. static Register buildBitFieldInsert(MachineIRBuilder &B,
  2498. Register TargetReg, Register InsertReg,
  2499. Register OffsetBits) {
  2500. LLT TargetTy = B.getMRI()->getType(TargetReg);
  2501. LLT InsertTy = B.getMRI()->getType(InsertReg);
  2502. auto ZextVal = B.buildZExt(TargetTy, InsertReg);
  2503. auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
  2504. // Produce a bitmask of the value to insert
  2505. auto EltMask = B.buildConstant(
  2506. TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
  2507. InsertTy.getSizeInBits()));
  2508. // Shift it into position
  2509. auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
  2510. auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
  2511. // Clear out the bits in the wide element
  2512. auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
  2513. // The value to insert has all zeros already, so stick it into the masked
  2514. // wide element.
  2515. return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
  2516. }
  2517. /// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
  2518. /// is increasing the element size, perform the indexing in the target element
  2519. /// type, and use bit operations to insert at the element position. This is
  2520. /// intended for architectures that can dynamically index the register file and
  2521. /// want to force indexing in the native register size.
  2522. LegalizerHelper::LegalizeResult
  2523. LegalizerHelper::bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx,
  2524. LLT CastTy) {
  2525. if (TypeIdx != 0)
  2526. return UnableToLegalize;
  2527. Register Dst = MI.getOperand(0).getReg();
  2528. Register SrcVec = MI.getOperand(1).getReg();
  2529. Register Val = MI.getOperand(2).getReg();
  2530. Register Idx = MI.getOperand(3).getReg();
  2531. LLT VecTy = MRI.getType(Dst);
  2532. LLT IdxTy = MRI.getType(Idx);
  2533. LLT VecEltTy = VecTy.getElementType();
  2534. LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
  2535. const unsigned NewEltSize = NewEltTy.getSizeInBits();
  2536. const unsigned OldEltSize = VecEltTy.getSizeInBits();
  2537. unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
  2538. unsigned OldNumElts = VecTy.getNumElements();
  2539. Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
  2540. if (NewNumElts < OldNumElts) {
  2541. if (NewEltSize % OldEltSize != 0)
  2542. return UnableToLegalize;
  2543. // This only depends on powers of 2 because we use bit tricks to figure out
  2544. // the bit offset we need to shift to get the target element. A general
  2545. // expansion could emit division/multiply.
  2546. if (!isPowerOf2_32(NewEltSize / OldEltSize))
  2547. return UnableToLegalize;
  2548. const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
  2549. auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
  2550. // Divide to get the index in the wider element type.
  2551. auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
  2552. Register ExtractedElt = CastVec;
  2553. if (CastTy.isVector()) {
  2554. ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
  2555. ScaledIdx).getReg(0);
  2556. }
  2557. // Compute the bit offset into the register of the target element.
  2558. Register OffsetBits = getBitcastWiderVectorElementOffset(
  2559. MIRBuilder, Idx, NewEltSize, OldEltSize);
  2560. Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
  2561. Val, OffsetBits);
  2562. if (CastTy.isVector()) {
  2563. InsertedElt = MIRBuilder.buildInsertVectorElement(
  2564. CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
  2565. }
  2566. MIRBuilder.buildBitcast(Dst, InsertedElt);
  2567. MI.eraseFromParent();
  2568. return Legalized;
  2569. }
  2570. return UnableToLegalize;
  2571. }
  2572. LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) {
  2573. // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
  2574. Register DstReg = LoadMI.getDstReg();
  2575. Register PtrReg = LoadMI.getPointerReg();
  2576. LLT DstTy = MRI.getType(DstReg);
  2577. MachineMemOperand &MMO = LoadMI.getMMO();
  2578. LLT MemTy = MMO.getMemoryType();
  2579. MachineFunction &MF = MIRBuilder.getMF();
  2580. unsigned MemSizeInBits = MemTy.getSizeInBits();
  2581. unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
  2582. if (MemSizeInBits != MemStoreSizeInBits) {
  2583. if (MemTy.isVector())
  2584. return UnableToLegalize;
  2585. // Promote to a byte-sized load if not loading an integral number of
  2586. // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
  2587. LLT WideMemTy = LLT::scalar(MemStoreSizeInBits);
  2588. MachineMemOperand *NewMMO =
  2589. MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy);
  2590. Register LoadReg = DstReg;
  2591. LLT LoadTy = DstTy;
  2592. // If this wasn't already an extending load, we need to widen the result
  2593. // register to avoid creating a load with a narrower result than the source.
  2594. if (MemStoreSizeInBits > DstTy.getSizeInBits()) {
  2595. LoadTy = WideMemTy;
  2596. LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
  2597. }
  2598. if (isa<GSExtLoad>(LoadMI)) {
  2599. auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
  2600. MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
  2601. } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) {
  2602. auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
  2603. // The extra bits are guaranteed to be zero, since we stored them that
  2604. // way. A zext load from Wide thus automatically gives zext from MemVT.
  2605. MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
  2606. } else {
  2607. MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
  2608. }
  2609. if (DstTy != LoadTy)
  2610. MIRBuilder.buildTrunc(DstReg, LoadReg);
  2611. LoadMI.eraseFromParent();
  2612. return Legalized;
  2613. }
  2614. // Big endian lowering not implemented.
  2615. if (MIRBuilder.getDataLayout().isBigEndian())
  2616. return UnableToLegalize;
  2617. // This load needs splitting into power of 2 sized loads.
  2618. //
  2619. // Our strategy here is to generate anyextending loads for the smaller
  2620. // types up to next power-2 result type, and then combine the two larger
  2621. // result values together, before truncating back down to the non-pow-2
  2622. // type.
  2623. // E.g. v1 = i24 load =>
  2624. // v2 = i32 zextload (2 byte)
  2625. // v3 = i32 load (1 byte)
  2626. // v4 = i32 shl v3, 16
  2627. // v5 = i32 or v4, v2
  2628. // v1 = i24 trunc v5
  2629. // By doing this we generate the correct truncate which should get
  2630. // combined away as an artifact with a matching extend.
  2631. uint64_t LargeSplitSize, SmallSplitSize;
  2632. if (!isPowerOf2_32(MemSizeInBits)) {
  2633. // This load needs splitting into power of 2 sized loads.
  2634. LargeSplitSize = PowerOf2Floor(MemSizeInBits);
  2635. SmallSplitSize = MemSizeInBits - LargeSplitSize;
  2636. } else {
  2637. // This is already a power of 2, but we still need to split this in half.
  2638. //
  2639. // Assume we're being asked to decompose an unaligned load.
  2640. // TODO: If this requires multiple splits, handle them all at once.
  2641. auto &Ctx = MF.getFunction().getContext();
  2642. if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
  2643. return UnableToLegalize;
  2644. SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
  2645. }
  2646. if (MemTy.isVector()) {
  2647. // TODO: Handle vector extloads
  2648. if (MemTy != DstTy)
  2649. return UnableToLegalize;
  2650. // TODO: We can do better than scalarizing the vector and at least split it
  2651. // in half.
  2652. return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType());
  2653. }
  2654. MachineMemOperand *LargeMMO =
  2655. MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
  2656. MachineMemOperand *SmallMMO =
  2657. MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
  2658. LLT PtrTy = MRI.getType(PtrReg);
  2659. unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits());
  2660. LLT AnyExtTy = LLT::scalar(AnyExtSize);
  2661. auto LargeLoad = MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
  2662. PtrReg, *LargeMMO);
  2663. auto OffsetCst = MIRBuilder.buildConstant(LLT::scalar(PtrTy.getSizeInBits()),
  2664. LargeSplitSize / 8);
  2665. Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
  2666. auto SmallPtr = MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst);
  2667. auto SmallLoad = MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), AnyExtTy,
  2668. SmallPtr, *SmallMMO);
  2669. auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
  2670. auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
  2671. if (AnyExtTy == DstTy)
  2672. MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
  2673. else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) {
  2674. auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
  2675. MIRBuilder.buildTrunc(DstReg, {Or});
  2676. } else {
  2677. assert(DstTy.isPointer() && "expected pointer");
  2678. auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
  2679. // FIXME: We currently consider this to be illegal for non-integral address
  2680. // spaces, but we need still need a way to reinterpret the bits.
  2681. MIRBuilder.buildIntToPtr(DstReg, Or);
  2682. }
  2683. LoadMI.eraseFromParent();
  2684. return Legalized;
  2685. }
  2686. LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) {
  2687. // Lower a non-power of 2 store into multiple pow-2 stores.
  2688. // E.g. split an i24 store into an i16 store + i8 store.
  2689. // We do this by first extending the stored value to the next largest power
  2690. // of 2 type, and then using truncating stores to store the components.
  2691. // By doing this, likewise with G_LOAD, generate an extend that can be
  2692. // artifact-combined away instead of leaving behind extracts.
  2693. Register SrcReg = StoreMI.getValueReg();
  2694. Register PtrReg = StoreMI.getPointerReg();
  2695. LLT SrcTy = MRI.getType(SrcReg);
  2696. MachineFunction &MF = MIRBuilder.getMF();
  2697. MachineMemOperand &MMO = **StoreMI.memoperands_begin();
  2698. LLT MemTy = MMO.getMemoryType();
  2699. unsigned StoreWidth = MemTy.getSizeInBits();
  2700. unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
  2701. if (StoreWidth != StoreSizeInBits) {
  2702. if (SrcTy.isVector())
  2703. return UnableToLegalize;
  2704. // Promote to a byte-sized store with upper bits zero if not
  2705. // storing an integral number of bytes. For example, promote
  2706. // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
  2707. LLT WideTy = LLT::scalar(StoreSizeInBits);
  2708. if (StoreSizeInBits > SrcTy.getSizeInBits()) {
  2709. // Avoid creating a store with a narrower source than result.
  2710. SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
  2711. SrcTy = WideTy;
  2712. }
  2713. auto ZextInReg = MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
  2714. MachineMemOperand *NewMMO =
  2715. MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy);
  2716. MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
  2717. StoreMI.eraseFromParent();
  2718. return Legalized;
  2719. }
  2720. if (MemTy.isVector()) {
  2721. // TODO: Handle vector trunc stores
  2722. if (MemTy != SrcTy)
  2723. return UnableToLegalize;
  2724. // TODO: We can do better than scalarizing the vector and at least split it
  2725. // in half.
  2726. return reduceLoadStoreWidth(StoreMI, 0, SrcTy.getElementType());
  2727. }
  2728. unsigned MemSizeInBits = MemTy.getSizeInBits();
  2729. uint64_t LargeSplitSize, SmallSplitSize;
  2730. if (!isPowerOf2_32(MemSizeInBits)) {
  2731. LargeSplitSize = PowerOf2Floor(MemTy.getSizeInBits());
  2732. SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
  2733. } else {
  2734. auto &Ctx = MF.getFunction().getContext();
  2735. if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
  2736. return UnableToLegalize; // Don't know what we're being asked to do.
  2737. SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
  2738. }
  2739. // Extend to the next pow-2. If this store was itself the result of lowering,
  2740. // e.g. an s56 store being broken into s32 + s24, we might have a stored type
  2741. // that's wider than the stored size.
  2742. unsigned AnyExtSize = PowerOf2Ceil(MemTy.getSizeInBits());
  2743. const LLT NewSrcTy = LLT::scalar(AnyExtSize);
  2744. if (SrcTy.isPointer()) {
  2745. const LLT IntPtrTy = LLT::scalar(SrcTy.getSizeInBits());
  2746. SrcReg = MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
  2747. }
  2748. auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
  2749. // Obtain the smaller value by shifting away the larger value.
  2750. auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
  2751. auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
  2752. // Generate the PtrAdd and truncating stores.
  2753. LLT PtrTy = MRI.getType(PtrReg);
  2754. auto OffsetCst = MIRBuilder.buildConstant(
  2755. LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
  2756. auto SmallPtr =
  2757. MIRBuilder.buildPtrAdd(PtrTy, PtrReg, OffsetCst);
  2758. MachineMemOperand *LargeMMO =
  2759. MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
  2760. MachineMemOperand *SmallMMO =
  2761. MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
  2762. MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
  2763. MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
  2764. StoreMI.eraseFromParent();
  2765. return Legalized;
  2766. }
  2767. LegalizerHelper::LegalizeResult
  2768. LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
  2769. switch (MI.getOpcode()) {
  2770. case TargetOpcode::G_LOAD: {
  2771. if (TypeIdx != 0)
  2772. return UnableToLegalize;
  2773. MachineMemOperand &MMO = **MI.memoperands_begin();
  2774. // Not sure how to interpret a bitcast of an extending load.
  2775. if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
  2776. return UnableToLegalize;
  2777. Observer.changingInstr(MI);
  2778. bitcastDst(MI, CastTy, 0);
  2779. MMO.setType(CastTy);
  2780. Observer.changedInstr(MI);
  2781. return Legalized;
  2782. }
  2783. case TargetOpcode::G_STORE: {
  2784. if (TypeIdx != 0)
  2785. return UnableToLegalize;
  2786. MachineMemOperand &MMO = **MI.memoperands_begin();
  2787. // Not sure how to interpret a bitcast of a truncating store.
  2788. if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
  2789. return UnableToLegalize;
  2790. Observer.changingInstr(MI);
  2791. bitcastSrc(MI, CastTy, 0);
  2792. MMO.setType(CastTy);
  2793. Observer.changedInstr(MI);
  2794. return Legalized;
  2795. }
  2796. case TargetOpcode::G_SELECT: {
  2797. if (TypeIdx != 0)
  2798. return UnableToLegalize;
  2799. if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
  2800. LLVM_DEBUG(
  2801. dbgs() << "bitcast action not implemented for vector select\n");
  2802. return UnableToLegalize;
  2803. }
  2804. Observer.changingInstr(MI);
  2805. bitcastSrc(MI, CastTy, 2);
  2806. bitcastSrc(MI, CastTy, 3);
  2807. bitcastDst(MI, CastTy, 0);
  2808. Observer.changedInstr(MI);
  2809. return Legalized;
  2810. }
  2811. case TargetOpcode::G_AND:
  2812. case TargetOpcode::G_OR:
  2813. case TargetOpcode::G_XOR: {
  2814. Observer.changingInstr(MI);
  2815. bitcastSrc(MI, CastTy, 1);
  2816. bitcastSrc(MI, CastTy, 2);
  2817. bitcastDst(MI, CastTy, 0);
  2818. Observer.changedInstr(MI);
  2819. return Legalized;
  2820. }
  2821. case TargetOpcode::G_EXTRACT_VECTOR_ELT:
  2822. return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
  2823. case TargetOpcode::G_INSERT_VECTOR_ELT:
  2824. return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
  2825. default:
  2826. return UnableToLegalize;
  2827. }
  2828. }
  2829. // Legalize an instruction by changing the opcode in place.
  2830. void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
  2831. Observer.changingInstr(MI);
  2832. MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
  2833. Observer.changedInstr(MI);
  2834. }
  2835. LegalizerHelper::LegalizeResult
  2836. LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
  2837. using namespace TargetOpcode;
  2838. switch(MI.getOpcode()) {
  2839. default:
  2840. return UnableToLegalize;
  2841. case TargetOpcode::G_BITCAST:
  2842. return lowerBitcast(MI);
  2843. case TargetOpcode::G_SREM:
  2844. case TargetOpcode::G_UREM: {
  2845. LLT Ty = MRI.getType(MI.getOperand(0).getReg());
  2846. auto Quot =
  2847. MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
  2848. {MI.getOperand(1), MI.getOperand(2)});
  2849. auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
  2850. MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
  2851. MI.eraseFromParent();
  2852. return Legalized;
  2853. }
  2854. case TargetOpcode::G_SADDO:
  2855. case TargetOpcode::G_SSUBO:
  2856. return lowerSADDO_SSUBO(MI);
  2857. case TargetOpcode::G_UMULH:
  2858. case TargetOpcode::G_SMULH:
  2859. return lowerSMULH_UMULH(MI);
  2860. case TargetOpcode::G_SMULO:
  2861. case TargetOpcode::G_UMULO: {
  2862. // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
  2863. // result.
  2864. Register Res = MI.getOperand(0).getReg();
  2865. Register Overflow = MI.getOperand(1).getReg();
  2866. Register LHS = MI.getOperand(2).getReg();
  2867. Register RHS = MI.getOperand(3).getReg();
  2868. LLT Ty = MRI.getType(Res);
  2869. unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
  2870. ? TargetOpcode::G_SMULH
  2871. : TargetOpcode::G_UMULH;
  2872. Observer.changingInstr(MI);
  2873. const auto &TII = MIRBuilder.getTII();
  2874. MI.setDesc(TII.get(TargetOpcode::G_MUL));
  2875. MI.removeOperand(1);
  2876. Observer.changedInstr(MI);
  2877. auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
  2878. auto Zero = MIRBuilder.buildConstant(Ty, 0);
  2879. // Move insert point forward so we can use the Res register if needed.
  2880. MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
  2881. // For *signed* multiply, overflow is detected by checking:
  2882. // (hi != (lo >> bitwidth-1))
  2883. if (Opcode == TargetOpcode::G_SMULH) {
  2884. auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
  2885. auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
  2886. MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
  2887. } else {
  2888. MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
  2889. }
  2890. return Legalized;
  2891. }
  2892. case TargetOpcode::G_FNEG: {
  2893. Register Res = MI.getOperand(0).getReg();
  2894. LLT Ty = MRI.getType(Res);
  2895. // TODO: Handle vector types once we are able to
  2896. // represent them.
  2897. if (Ty.isVector())
  2898. return UnableToLegalize;
  2899. auto SignMask =
  2900. MIRBuilder.buildConstant(Ty, APInt::getSignMask(Ty.getSizeInBits()));
  2901. Register SubByReg = MI.getOperand(1).getReg();
  2902. MIRBuilder.buildXor(Res, SubByReg, SignMask);
  2903. MI.eraseFromParent();
  2904. return Legalized;
  2905. }
  2906. case TargetOpcode::G_FSUB:
  2907. case TargetOpcode::G_STRICT_FSUB: {
  2908. Register Res = MI.getOperand(0).getReg();
  2909. LLT Ty = MRI.getType(Res);
  2910. // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
  2911. // First, check if G_FNEG is marked as Lower. If so, we may
  2912. // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
  2913. if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
  2914. return UnableToLegalize;
  2915. Register LHS = MI.getOperand(1).getReg();
  2916. Register RHS = MI.getOperand(2).getReg();
  2917. auto Neg = MIRBuilder.buildFNeg(Ty, RHS);
  2918. if (MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
  2919. MIRBuilder.buildStrictFAdd(Res, LHS, Neg, MI.getFlags());
  2920. else
  2921. MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
  2922. MI.eraseFromParent();
  2923. return Legalized;
  2924. }
  2925. case TargetOpcode::G_FMAD:
  2926. return lowerFMad(MI);
  2927. case TargetOpcode::G_FFLOOR:
  2928. return lowerFFloor(MI);
  2929. case TargetOpcode::G_INTRINSIC_ROUND:
  2930. return lowerIntrinsicRound(MI);
  2931. case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
  2932. // Since round even is the assumed rounding mode for unconstrained FP
  2933. // operations, rint and roundeven are the same operation.
  2934. changeOpcode(MI, TargetOpcode::G_FRINT);
  2935. return Legalized;
  2936. }
  2937. case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
  2938. Register OldValRes = MI.getOperand(0).getReg();
  2939. Register SuccessRes = MI.getOperand(1).getReg();
  2940. Register Addr = MI.getOperand(2).getReg();
  2941. Register CmpVal = MI.getOperand(3).getReg();
  2942. Register NewVal = MI.getOperand(4).getReg();
  2943. MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
  2944. **MI.memoperands_begin());
  2945. MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
  2946. MI.eraseFromParent();
  2947. return Legalized;
  2948. }
  2949. case TargetOpcode::G_LOAD:
  2950. case TargetOpcode::G_SEXTLOAD:
  2951. case TargetOpcode::G_ZEXTLOAD:
  2952. return lowerLoad(cast<GAnyLoad>(MI));
  2953. case TargetOpcode::G_STORE:
  2954. return lowerStore(cast<GStore>(MI));
  2955. case TargetOpcode::G_CTLZ_ZERO_UNDEF:
  2956. case TargetOpcode::G_CTTZ_ZERO_UNDEF:
  2957. case TargetOpcode::G_CTLZ:
  2958. case TargetOpcode::G_CTTZ:
  2959. case TargetOpcode::G_CTPOP:
  2960. return lowerBitCount(MI);
  2961. case G_UADDO: {
  2962. Register Res = MI.getOperand(0).getReg();
  2963. Register CarryOut = MI.getOperand(1).getReg();
  2964. Register LHS = MI.getOperand(2).getReg();
  2965. Register RHS = MI.getOperand(3).getReg();
  2966. MIRBuilder.buildAdd(Res, LHS, RHS);
  2967. MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS);
  2968. MI.eraseFromParent();
  2969. return Legalized;
  2970. }
  2971. case G_UADDE: {
  2972. Register Res = MI.getOperand(0).getReg();
  2973. Register CarryOut = MI.getOperand(1).getReg();
  2974. Register LHS = MI.getOperand(2).getReg();
  2975. Register RHS = MI.getOperand(3).getReg();
  2976. Register CarryIn = MI.getOperand(4).getReg();
  2977. LLT Ty = MRI.getType(Res);
  2978. auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
  2979. auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
  2980. MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
  2981. MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS);
  2982. MI.eraseFromParent();
  2983. return Legalized;
  2984. }
  2985. case G_USUBO: {
  2986. Register Res = MI.getOperand(0).getReg();
  2987. Register BorrowOut = MI.getOperand(1).getReg();
  2988. Register LHS = MI.getOperand(2).getReg();
  2989. Register RHS = MI.getOperand(3).getReg();
  2990. MIRBuilder.buildSub(Res, LHS, RHS);
  2991. MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
  2992. MI.eraseFromParent();
  2993. return Legalized;
  2994. }
  2995. case G_USUBE: {
  2996. Register Res = MI.getOperand(0).getReg();
  2997. Register BorrowOut = MI.getOperand(1).getReg();
  2998. Register LHS = MI.getOperand(2).getReg();
  2999. Register RHS = MI.getOperand(3).getReg();
  3000. Register BorrowIn = MI.getOperand(4).getReg();
  3001. const LLT CondTy = MRI.getType(BorrowOut);
  3002. const LLT Ty = MRI.getType(Res);
  3003. auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
  3004. auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
  3005. MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
  3006. auto LHS_EQ_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, LHS, RHS);
  3007. auto LHS_ULT_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, LHS, RHS);
  3008. MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS);
  3009. MI.eraseFromParent();
  3010. return Legalized;
  3011. }
  3012. case G_UITOFP:
  3013. return lowerUITOFP(MI);
  3014. case G_SITOFP:
  3015. return lowerSITOFP(MI);
  3016. case G_FPTOUI:
  3017. return lowerFPTOUI(MI);
  3018. case G_FPTOSI:
  3019. return lowerFPTOSI(MI);
  3020. case G_FPTRUNC:
  3021. return lowerFPTRUNC(MI);
  3022. case G_FPOWI:
  3023. return lowerFPOWI(MI);
  3024. case G_SMIN:
  3025. case G_SMAX:
  3026. case G_UMIN:
  3027. case G_UMAX:
  3028. return lowerMinMax(MI);
  3029. case G_FCOPYSIGN:
  3030. return lowerFCopySign(MI);
  3031. case G_FMINNUM:
  3032. case G_FMAXNUM:
  3033. return lowerFMinNumMaxNum(MI);
  3034. case G_MERGE_VALUES:
  3035. return lowerMergeValues(MI);
  3036. case G_UNMERGE_VALUES:
  3037. return lowerUnmergeValues(MI);
  3038. case TargetOpcode::G_SEXT_INREG: {
  3039. assert(MI.getOperand(2).isImm() && "Expected immediate");
  3040. int64_t SizeInBits = MI.getOperand(2).getImm();
  3041. Register DstReg = MI.getOperand(0).getReg();
  3042. Register SrcReg = MI.getOperand(1).getReg();
  3043. LLT DstTy = MRI.getType(DstReg);
  3044. Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
  3045. auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
  3046. MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
  3047. MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
  3048. MI.eraseFromParent();
  3049. return Legalized;
  3050. }
  3051. case G_EXTRACT_VECTOR_ELT:
  3052. case G_INSERT_VECTOR_ELT:
  3053. return lowerExtractInsertVectorElt(MI);
  3054. case G_SHUFFLE_VECTOR:
  3055. return lowerShuffleVector(MI);
  3056. case G_DYN_STACKALLOC:
  3057. return lowerDynStackAlloc(MI);
  3058. case G_EXTRACT:
  3059. return lowerExtract(MI);
  3060. case G_INSERT:
  3061. return lowerInsert(MI);
  3062. case G_BSWAP:
  3063. return lowerBswap(MI);
  3064. case G_BITREVERSE:
  3065. return lowerBitreverse(MI);
  3066. case G_READ_REGISTER:
  3067. case G_WRITE_REGISTER:
  3068. return lowerReadWriteRegister(MI);
  3069. case G_UADDSAT:
  3070. case G_USUBSAT: {
  3071. // Try to make a reasonable guess about which lowering strategy to use. The
  3072. // target can override this with custom lowering and calling the
  3073. // implementation functions.
  3074. LLT Ty = MRI.getType(MI.getOperand(0).getReg());
  3075. if (LI.isLegalOrCustom({G_UMIN, Ty}))
  3076. return lowerAddSubSatToMinMax(MI);
  3077. return lowerAddSubSatToAddoSubo(MI);
  3078. }
  3079. case G_SADDSAT:
  3080. case G_SSUBSAT: {
  3081. LLT Ty = MRI.getType(MI.getOperand(0).getReg());
  3082. // FIXME: It would probably make more sense to see if G_SADDO is preferred,
  3083. // since it's a shorter expansion. However, we would need to figure out the
  3084. // preferred boolean type for the carry out for the query.
  3085. if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
  3086. return lowerAddSubSatToMinMax(MI);
  3087. return lowerAddSubSatToAddoSubo(MI);
  3088. }
  3089. case G_SSHLSAT:
  3090. case G_USHLSAT:
  3091. return lowerShlSat(MI);
  3092. case G_ABS:
  3093. return lowerAbsToAddXor(MI);
  3094. case G_SELECT:
  3095. return lowerSelect(MI);
  3096. case G_IS_FPCLASS:
  3097. return lowerISFPCLASS(MI);
  3098. case G_SDIVREM:
  3099. case G_UDIVREM:
  3100. return lowerDIVREM(MI);
  3101. case G_FSHL:
  3102. case G_FSHR:
  3103. return lowerFunnelShift(MI);
  3104. case G_ROTL:
  3105. case G_ROTR:
  3106. return lowerRotate(MI);
  3107. case G_MEMSET:
  3108. case G_MEMCPY:
  3109. case G_MEMMOVE:
  3110. return lowerMemCpyFamily(MI);
  3111. case G_MEMCPY_INLINE:
  3112. return lowerMemcpyInline(MI);
  3113. GISEL_VECREDUCE_CASES_NONSEQ
  3114. return lowerVectorReduction(MI);
  3115. }
  3116. }
  3117. Align LegalizerHelper::getStackTemporaryAlignment(LLT Ty,
  3118. Align MinAlign) const {
  3119. // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
  3120. // datalayout for the preferred alignment. Also there should be a target hook
  3121. // for this to allow targets to reduce the alignment and ignore the
  3122. // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
  3123. // the type.
  3124. return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign);
  3125. }
  3126. MachineInstrBuilder
  3127. LegalizerHelper::createStackTemporary(TypeSize Bytes, Align Alignment,
  3128. MachinePointerInfo &PtrInfo) {
  3129. MachineFunction &MF = MIRBuilder.getMF();
  3130. const DataLayout &DL = MIRBuilder.getDataLayout();
  3131. int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
  3132. unsigned AddrSpace = DL.getAllocaAddrSpace();
  3133. LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
  3134. PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
  3135. return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
  3136. }
  3137. static Register clampDynamicVectorIndex(MachineIRBuilder &B, Register IdxReg,
  3138. LLT VecTy) {
  3139. int64_t IdxVal;
  3140. if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal)))
  3141. return IdxReg;
  3142. LLT IdxTy = B.getMRI()->getType(IdxReg);
  3143. unsigned NElts = VecTy.getNumElements();
  3144. if (isPowerOf2_32(NElts)) {
  3145. APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
  3146. return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
  3147. }
  3148. return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
  3149. .getReg(0);
  3150. }
  3151. Register LegalizerHelper::getVectorElementPointer(Register VecPtr, LLT VecTy,
  3152. Register Index) {
  3153. LLT EltTy = VecTy.getElementType();
  3154. // Calculate the element offset and add it to the pointer.
  3155. unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
  3156. assert(EltSize * 8 == EltTy.getSizeInBits() &&
  3157. "Converting bits to bytes lost precision");
  3158. Index = clampDynamicVectorIndex(MIRBuilder, Index, VecTy);
  3159. LLT IdxTy = MRI.getType(Index);
  3160. auto Mul = MIRBuilder.buildMul(IdxTy, Index,
  3161. MIRBuilder.buildConstant(IdxTy, EltSize));
  3162. LLT PtrTy = MRI.getType(VecPtr);
  3163. return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
  3164. }
  3165. #ifndef NDEBUG
  3166. /// Check that all vector operands have same number of elements. Other operands
  3167. /// should be listed in NonVecOp.
  3168. static bool hasSameNumEltsOnAllVectorOperands(
  3169. GenericMachineInstr &MI, MachineRegisterInfo &MRI,
  3170. std::initializer_list<unsigned> NonVecOpIndices) {
  3171. if (MI.getNumMemOperands() != 0)
  3172. return false;
  3173. LLT VecTy = MRI.getType(MI.getReg(0));
  3174. if (!VecTy.isVector())
  3175. return false;
  3176. unsigned NumElts = VecTy.getNumElements();
  3177. for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
  3178. MachineOperand &Op = MI.getOperand(OpIdx);
  3179. if (!Op.isReg()) {
  3180. if (!is_contained(NonVecOpIndices, OpIdx))
  3181. return false;
  3182. continue;
  3183. }
  3184. LLT Ty = MRI.getType(Op.getReg());
  3185. if (!Ty.isVector()) {
  3186. if (!is_contained(NonVecOpIndices, OpIdx))
  3187. return false;
  3188. continue;
  3189. }
  3190. if (Ty.getNumElements() != NumElts)
  3191. return false;
  3192. }
  3193. return true;
  3194. }
  3195. #endif
  3196. /// Fill \p DstOps with DstOps that have same number of elements combined as
  3197. /// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are
  3198. /// vectors with \p NumElts elements. When Ty.getNumElements() is not multiple
  3199. /// of \p NumElts last DstOp (leftover) has fewer then \p NumElts elements.
  3200. static void makeDstOps(SmallVectorImpl<DstOp> &DstOps, LLT Ty,
  3201. unsigned NumElts) {
  3202. LLT LeftoverTy;
  3203. assert(Ty.isVector() && "Expected vector type");
  3204. LLT EltTy = Ty.getElementType();
  3205. LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
  3206. int NumParts, NumLeftover;
  3207. std::tie(NumParts, NumLeftover) =
  3208. getNarrowTypeBreakDown(Ty, NarrowTy, LeftoverTy);
  3209. assert(NumParts > 0 && "Error in getNarrowTypeBreakDown");
  3210. for (int i = 0; i < NumParts; ++i) {
  3211. DstOps.push_back(NarrowTy);
  3212. }
  3213. if (LeftoverTy.isValid()) {
  3214. assert(NumLeftover == 1 && "expected exactly one leftover");
  3215. DstOps.push_back(LeftoverTy);
  3216. }
  3217. }
  3218. /// Operand \p Op is used on \p N sub-instructions. Fill \p Ops with \p N SrcOps
  3219. /// made from \p Op depending on operand type.
  3220. static void broadcastSrcOp(SmallVectorImpl<SrcOp> &Ops, unsigned N,
  3221. MachineOperand &Op) {
  3222. for (unsigned i = 0; i < N; ++i) {
  3223. if (Op.isReg())
  3224. Ops.push_back(Op.getReg());
  3225. else if (Op.isImm())
  3226. Ops.push_back(Op.getImm());
  3227. else if (Op.isPredicate())
  3228. Ops.push_back(static_cast<CmpInst::Predicate>(Op.getPredicate()));
  3229. else
  3230. llvm_unreachable("Unsupported type");
  3231. }
  3232. }
  3233. // Handle splitting vector operations which need to have the same number of
  3234. // elements in each type index, but each type index may have a different element
  3235. // type.
  3236. //
  3237. // e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
  3238. // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
  3239. // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
  3240. //
  3241. // Also handles some irregular breakdown cases, e.g.
  3242. // e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
  3243. // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
  3244. // s64 = G_SHL s64, s32
  3245. LegalizerHelper::LegalizeResult
  3246. LegalizerHelper::fewerElementsVectorMultiEltType(
  3247. GenericMachineInstr &MI, unsigned NumElts,
  3248. std::initializer_list<unsigned> NonVecOpIndices) {
  3249. assert(hasSameNumEltsOnAllVectorOperands(MI, MRI, NonVecOpIndices) &&
  3250. "Non-compatible opcode or not specified non-vector operands");
  3251. unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
  3252. unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
  3253. unsigned NumDefs = MI.getNumDefs();
  3254. // Create DstOps (sub-vectors with NumElts elts + Leftover) for each output.
  3255. // Build instructions with DstOps to use instruction found by CSE directly.
  3256. // CSE copies found instruction into given vreg when building with vreg dest.
  3257. SmallVector<SmallVector<DstOp, 8>, 2> OutputOpsPieces(NumDefs);
  3258. // Output registers will be taken from created instructions.
  3259. SmallVector<SmallVector<Register, 8>, 2> OutputRegs(NumDefs);
  3260. for (unsigned i = 0; i < NumDefs; ++i) {
  3261. makeDstOps(OutputOpsPieces[i], MRI.getType(MI.getReg(i)), NumElts);
  3262. }
  3263. // Split vector input operands into sub-vectors with NumElts elts + Leftover.
  3264. // Operands listed in NonVecOpIndices will be used as is without splitting;
  3265. // examples: compare predicate in icmp and fcmp (op 1), vector select with i1
  3266. // scalar condition (op 1), immediate in sext_inreg (op 2).
  3267. SmallVector<SmallVector<SrcOp, 8>, 3> InputOpsPieces(NumInputs);
  3268. for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
  3269. ++UseIdx, ++UseNo) {
  3270. if (is_contained(NonVecOpIndices, UseIdx)) {
  3271. broadcastSrcOp(InputOpsPieces[UseNo], OutputOpsPieces[0].size(),
  3272. MI.getOperand(UseIdx));
  3273. } else {
  3274. SmallVector<Register, 8> SplitPieces;
  3275. extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces);
  3276. for (auto Reg : SplitPieces)
  3277. InputOpsPieces[UseNo].push_back(Reg);
  3278. }
  3279. }
  3280. unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
  3281. // Take i-th piece of each input operand split and build sub-vector/scalar
  3282. // instruction. Set i-th DstOp(s) from OutputOpsPieces as destination(s).
  3283. for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
  3284. SmallVector<DstOp, 2> Defs;
  3285. for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
  3286. Defs.push_back(OutputOpsPieces[DstNo][i]);
  3287. SmallVector<SrcOp, 3> Uses;
  3288. for (unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
  3289. Uses.push_back(InputOpsPieces[InputNo][i]);
  3290. auto I = MIRBuilder.buildInstr(MI.getOpcode(), Defs, Uses, MI.getFlags());
  3291. for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
  3292. OutputRegs[DstNo].push_back(I.getReg(DstNo));
  3293. }
  3294. // Merge small outputs into MI's output for each def operand.
  3295. if (NumLeftovers) {
  3296. for (unsigned i = 0; i < NumDefs; ++i)
  3297. mergeMixedSubvectors(MI.getReg(i), OutputRegs[i]);
  3298. } else {
  3299. for (unsigned i = 0; i < NumDefs; ++i)
  3300. MIRBuilder.buildMergeLikeInstr(MI.getReg(i), OutputRegs[i]);
  3301. }
  3302. MI.eraseFromParent();
  3303. return Legalized;
  3304. }
  3305. LegalizerHelper::LegalizeResult
  3306. LegalizerHelper::fewerElementsVectorPhi(GenericMachineInstr &MI,
  3307. unsigned NumElts) {
  3308. unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
  3309. unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
  3310. unsigned NumDefs = MI.getNumDefs();
  3311. SmallVector<DstOp, 8> OutputOpsPieces;
  3312. SmallVector<Register, 8> OutputRegs;
  3313. makeDstOps(OutputOpsPieces, MRI.getType(MI.getReg(0)), NumElts);
  3314. // Instructions that perform register split will be inserted in basic block
  3315. // where register is defined (basic block is in the next operand).
  3316. SmallVector<SmallVector<Register, 8>, 3> InputOpsPieces(NumInputs / 2);
  3317. for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
  3318. UseIdx += 2, ++UseNo) {
  3319. MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB();
  3320. MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
  3321. extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo]);
  3322. }
  3323. // Build PHIs with fewer elements.
  3324. unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
  3325. MIRBuilder.setInsertPt(*MI.getParent(), MI);
  3326. for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
  3327. auto Phi = MIRBuilder.buildInstr(TargetOpcode::G_PHI);
  3328. Phi.addDef(
  3329. MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
  3330. OutputRegs.push_back(Phi.getReg(0));
  3331. for (unsigned j = 0; j < NumInputs / 2; ++j) {
  3332. Phi.addUse(InputOpsPieces[j][i]);
  3333. Phi.add(MI.getOperand(1 + j * 2 + 1));
  3334. }
  3335. }
  3336. // Merge small outputs into MI's def.
  3337. if (NumLeftovers) {
  3338. mergeMixedSubvectors(MI.getReg(0), OutputRegs);
  3339. } else {
  3340. MIRBuilder.buildMergeLikeInstr(MI.getReg(0), OutputRegs);
  3341. }
  3342. MI.eraseFromParent();
  3343. return Legalized;
  3344. }
  3345. LegalizerHelper::LegalizeResult
  3346. LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI,
  3347. unsigned TypeIdx,
  3348. LLT NarrowTy) {
  3349. const int NumDst = MI.getNumOperands() - 1;
  3350. const Register SrcReg = MI.getOperand(NumDst).getReg();
  3351. LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
  3352. LLT SrcTy = MRI.getType(SrcReg);
  3353. if (TypeIdx != 1 || NarrowTy == DstTy)
  3354. return UnableToLegalize;
  3355. // Requires compatible types. Otherwise SrcReg should have been defined by
  3356. // merge-like instruction that would get artifact combined. Most likely
  3357. // instruction that defines SrcReg has to perform more/fewer elements
  3358. // legalization compatible with NarrowTy.
  3359. assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types");
  3360. assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
  3361. if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
  3362. (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0))
  3363. return UnableToLegalize;
  3364. // This is most likely DstTy (smaller then register size) packed in SrcTy
  3365. // (larger then register size) and since unmerge was not combined it will be
  3366. // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy
  3367. // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy.
  3368. // %1:_(DstTy), %2, %3, %4 = G_UNMERGE_VALUES %0:_(SrcTy)
  3369. //
  3370. // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence
  3371. // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg
  3372. // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy)
  3373. auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
  3374. const int NumUnmerge = Unmerge->getNumOperands() - 1;
  3375. const int PartsPerUnmerge = NumDst / NumUnmerge;
  3376. for (int I = 0; I != NumUnmerge; ++I) {
  3377. auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
  3378. for (int J = 0; J != PartsPerUnmerge; ++J)
  3379. MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
  3380. MIB.addUse(Unmerge.getReg(I));
  3381. }
  3382. MI.eraseFromParent();
  3383. return Legalized;
  3384. }
  3385. LegalizerHelper::LegalizeResult
  3386. LegalizerHelper::fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx,
  3387. LLT NarrowTy) {
  3388. Register DstReg = MI.getOperand(0).getReg();
  3389. LLT DstTy = MRI.getType(DstReg);
  3390. LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
  3391. // Requires compatible types. Otherwise user of DstReg did not perform unmerge
  3392. // that should have been artifact combined. Most likely instruction that uses
  3393. // DstReg has to do more/fewer elements legalization compatible with NarrowTy.
  3394. assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types");
  3395. assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
  3396. if (NarrowTy == SrcTy)
  3397. return UnableToLegalize;
  3398. // This attempts to lower part of LCMTy merge/unmerge sequence. Intended use
  3399. // is for old mir tests. Since the changes to more/fewer elements it should no
  3400. // longer be possible to generate MIR like this when starting from llvm-ir
  3401. // because LCMTy approach was replaced with merge/unmerge to vector elements.
  3402. if (TypeIdx == 1) {
  3403. assert(SrcTy.isVector() && "Expected vector types");
  3404. assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
  3405. if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
  3406. (NarrowTy.getNumElements() >= SrcTy.getNumElements()))
  3407. return UnableToLegalize;
  3408. // %2:_(DstTy) = G_CONCAT_VECTORS %0:_(SrcTy), %1:_(SrcTy)
  3409. //
  3410. // %3:_(EltTy), %4, %5 = G_UNMERGE_VALUES %0:_(SrcTy)
  3411. // %6:_(EltTy), %7, %8 = G_UNMERGE_VALUES %1:_(SrcTy)
  3412. // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4
  3413. // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6
  3414. // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8
  3415. // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11
  3416. SmallVector<Register, 8> Elts;
  3417. LLT EltTy = MRI.getType(MI.getOperand(1).getReg()).getScalarType();
  3418. for (unsigned i = 1; i < MI.getNumOperands(); ++i) {
  3419. auto Unmerge = MIRBuilder.buildUnmerge(EltTy, MI.getOperand(i).getReg());
  3420. for (unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
  3421. Elts.push_back(Unmerge.getReg(j));
  3422. }
  3423. SmallVector<Register, 8> NarrowTyElts;
  3424. unsigned NumNarrowTyElts = NarrowTy.getNumElements();
  3425. unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
  3426. for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces;
  3427. ++i, Offset += NumNarrowTyElts) {
  3428. ArrayRef<Register> Pieces(&Elts[Offset], NumNarrowTyElts);
  3429. NarrowTyElts.push_back(
  3430. MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
  3431. }
  3432. MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
  3433. MI.eraseFromParent();
  3434. return Legalized;
  3435. }
  3436. assert(TypeIdx == 0 && "Bad type index");
  3437. if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
  3438. (DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0))
  3439. return UnableToLegalize;
  3440. // This is most likely SrcTy (smaller then register size) packed in DstTy
  3441. // (larger then register size) and since merge was not combined it will be
  3442. // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy
  3443. // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy.
  3444. // %0:_(DstTy) = G_MERGE_VALUES %1:_(SrcTy), %2, %3, %4
  3445. //
  3446. // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg
  3447. // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4
  3448. // %0:_(DstTy) = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence
  3449. SmallVector<Register, 8> NarrowTyElts;
  3450. unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
  3451. unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
  3452. unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts;
  3453. for (unsigned i = 0; i < NumParts; ++i) {
  3454. SmallVector<Register, 8> Sources;
  3455. for (unsigned j = 0; j < NumElts; ++j)
  3456. Sources.push_back(MI.getOperand(1 + i * NumElts + j).getReg());
  3457. NarrowTyElts.push_back(
  3458. MIRBuilder.buildMergeLikeInstr(NarrowTy, Sources).getReg(0));
  3459. }
  3460. MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
  3461. MI.eraseFromParent();
  3462. return Legalized;
  3463. }
  3464. LegalizerHelper::LegalizeResult
  3465. LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI,
  3466. unsigned TypeIdx,
  3467. LLT NarrowVecTy) {
  3468. Register DstReg = MI.getOperand(0).getReg();
  3469. Register SrcVec = MI.getOperand(1).getReg();
  3470. Register InsertVal;
  3471. bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
  3472. assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
  3473. if (IsInsert)
  3474. InsertVal = MI.getOperand(2).getReg();
  3475. Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
  3476. // TODO: Handle total scalarization case.
  3477. if (!NarrowVecTy.isVector())
  3478. return UnableToLegalize;
  3479. LLT VecTy = MRI.getType(SrcVec);
  3480. // If the index is a constant, we can really break this down as you would
  3481. // expect, and index into the target size pieces.
  3482. int64_t IdxVal;
  3483. auto MaybeCst = getIConstantVRegValWithLookThrough(Idx, MRI);
  3484. if (MaybeCst) {
  3485. IdxVal = MaybeCst->Value.getSExtValue();
  3486. // Avoid out of bounds indexing the pieces.
  3487. if (IdxVal >= VecTy.getNumElements()) {
  3488. MIRBuilder.buildUndef(DstReg);
  3489. MI.eraseFromParent();
  3490. return Legalized;
  3491. }
  3492. SmallVector<Register, 8> VecParts;
  3493. LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
  3494. // Build a sequence of NarrowTy pieces in VecParts for this operand.
  3495. LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
  3496. TargetOpcode::G_ANYEXT);
  3497. unsigned NewNumElts = NarrowVecTy.getNumElements();
  3498. LLT IdxTy = MRI.getType(Idx);
  3499. int64_t PartIdx = IdxVal / NewNumElts;
  3500. auto NewIdx =
  3501. MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
  3502. if (IsInsert) {
  3503. LLT PartTy = MRI.getType(VecParts[PartIdx]);
  3504. // Use the adjusted index to insert into one of the subvectors.
  3505. auto InsertPart = MIRBuilder.buildInsertVectorElement(
  3506. PartTy, VecParts[PartIdx], InsertVal, NewIdx);
  3507. VecParts[PartIdx] = InsertPart.getReg(0);
  3508. // Recombine the inserted subvector with the others to reform the result
  3509. // vector.
  3510. buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
  3511. } else {
  3512. MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
  3513. }
  3514. MI.eraseFromParent();
  3515. return Legalized;
  3516. }
  3517. // With a variable index, we can't perform the operation in a smaller type, so
  3518. // we're forced to expand this.
  3519. //
  3520. // TODO: We could emit a chain of compare/select to figure out which piece to
  3521. // index.
  3522. return lowerExtractInsertVectorElt(MI);
  3523. }
  3524. LegalizerHelper::LegalizeResult
  3525. LegalizerHelper::reduceLoadStoreWidth(GLoadStore &LdStMI, unsigned TypeIdx,
  3526. LLT NarrowTy) {
  3527. // FIXME: Don't know how to handle secondary types yet.
  3528. if (TypeIdx != 0)
  3529. return UnableToLegalize;
  3530. // This implementation doesn't work for atomics. Give up instead of doing
  3531. // something invalid.
  3532. if (LdStMI.isAtomic())
  3533. return UnableToLegalize;
  3534. bool IsLoad = isa<GLoad>(LdStMI);
  3535. Register ValReg = LdStMI.getReg(0);
  3536. Register AddrReg = LdStMI.getPointerReg();
  3537. LLT ValTy = MRI.getType(ValReg);
  3538. // FIXME: Do we need a distinct NarrowMemory legalize action?
  3539. if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize()) {
  3540. LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
  3541. return UnableToLegalize;
  3542. }
  3543. int NumParts = -1;
  3544. int NumLeftover = -1;
  3545. LLT LeftoverTy;
  3546. SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
  3547. if (IsLoad) {
  3548. std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
  3549. } else {
  3550. if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
  3551. NarrowLeftoverRegs)) {
  3552. NumParts = NarrowRegs.size();
  3553. NumLeftover = NarrowLeftoverRegs.size();
  3554. }
  3555. }
  3556. if (NumParts == -1)
  3557. return UnableToLegalize;
  3558. LLT PtrTy = MRI.getType(AddrReg);
  3559. const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
  3560. unsigned TotalSize = ValTy.getSizeInBits();
  3561. // Split the load/store into PartTy sized pieces starting at Offset. If this
  3562. // is a load, return the new registers in ValRegs. For a store, each elements
  3563. // of ValRegs should be PartTy. Returns the next offset that needs to be
  3564. // handled.
  3565. bool isBigEndian = MIRBuilder.getDataLayout().isBigEndian();
  3566. auto MMO = LdStMI.getMMO();
  3567. auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
  3568. unsigned NumParts, unsigned Offset) -> unsigned {
  3569. MachineFunction &MF = MIRBuilder.getMF();
  3570. unsigned PartSize = PartTy.getSizeInBits();
  3571. for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
  3572. ++Idx) {
  3573. unsigned ByteOffset = Offset / 8;
  3574. Register NewAddrReg;
  3575. MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
  3576. MachineMemOperand *NewMMO =
  3577. MF.getMachineMemOperand(&MMO, ByteOffset, PartTy);
  3578. if (IsLoad) {
  3579. Register Dst = MRI.createGenericVirtualRegister(PartTy);
  3580. ValRegs.push_back(Dst);
  3581. MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
  3582. } else {
  3583. MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
  3584. }
  3585. Offset = isBigEndian ? Offset - PartSize : Offset + PartSize;
  3586. }
  3587. return Offset;
  3588. };
  3589. unsigned Offset = isBigEndian ? TotalSize - NarrowTy.getSizeInBits() : 0;
  3590. unsigned HandledOffset =
  3591. splitTypePieces(NarrowTy, NarrowRegs, NumParts, Offset);
  3592. // Handle the rest of the register if this isn't an even type breakdown.
  3593. if (LeftoverTy.isValid())
  3594. splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
  3595. if (IsLoad) {
  3596. insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
  3597. LeftoverTy, NarrowLeftoverRegs);
  3598. }
  3599. LdStMI.eraseFromParent();
  3600. return Legalized;
  3601. }
  3602. LegalizerHelper::LegalizeResult
  3603. LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
  3604. LLT NarrowTy) {
  3605. using namespace TargetOpcode;
  3606. GenericMachineInstr &GMI = cast<GenericMachineInstr>(MI);
  3607. unsigned NumElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
  3608. switch (MI.getOpcode()) {
  3609. case G_IMPLICIT_DEF:
  3610. case G_TRUNC:
  3611. case G_AND:
  3612. case G_OR:
  3613. case G_XOR:
  3614. case G_ADD:
  3615. case G_SUB:
  3616. case G_MUL:
  3617. case G_PTR_ADD:
  3618. case G_SMULH:
  3619. case G_UMULH:
  3620. case G_FADD:
  3621. case G_FMUL:
  3622. case G_FSUB:
  3623. case G_FNEG:
  3624. case G_FABS:
  3625. case G_FCANONICALIZE:
  3626. case G_FDIV:
  3627. case G_FREM:
  3628. case G_FMA:
  3629. case G_FMAD:
  3630. case G_FPOW:
  3631. case G_FEXP:
  3632. case G_FEXP2:
  3633. case G_FLOG:
  3634. case G_FLOG2:
  3635. case G_FLOG10:
  3636. case G_FNEARBYINT:
  3637. case G_FCEIL:
  3638. case G_FFLOOR:
  3639. case G_FRINT:
  3640. case G_INTRINSIC_ROUND:
  3641. case G_INTRINSIC_ROUNDEVEN:
  3642. case G_INTRINSIC_TRUNC:
  3643. case G_FCOS:
  3644. case G_FSIN:
  3645. case G_FSQRT:
  3646. case G_BSWAP:
  3647. case G_BITREVERSE:
  3648. case G_SDIV:
  3649. case G_UDIV:
  3650. case G_SREM:
  3651. case G_UREM:
  3652. case G_SDIVREM:
  3653. case G_UDIVREM:
  3654. case G_SMIN:
  3655. case G_SMAX:
  3656. case G_UMIN:
  3657. case G_UMAX:
  3658. case G_ABS:
  3659. case G_FMINNUM:
  3660. case G_FMAXNUM:
  3661. case G_FMINNUM_IEEE:
  3662. case G_FMAXNUM_IEEE:
  3663. case G_FMINIMUM:
  3664. case G_FMAXIMUM:
  3665. case G_FSHL:
  3666. case G_FSHR:
  3667. case G_ROTL:
  3668. case G_ROTR:
  3669. case G_FREEZE:
  3670. case G_SADDSAT:
  3671. case G_SSUBSAT:
  3672. case G_UADDSAT:
  3673. case G_USUBSAT:
  3674. case G_UMULO:
  3675. case G_SMULO:
  3676. case G_SHL:
  3677. case G_LSHR:
  3678. case G_ASHR:
  3679. case G_SSHLSAT:
  3680. case G_USHLSAT:
  3681. case G_CTLZ:
  3682. case G_CTLZ_ZERO_UNDEF:
  3683. case G_CTTZ:
  3684. case G_CTTZ_ZERO_UNDEF:
  3685. case G_CTPOP:
  3686. case G_FCOPYSIGN:
  3687. case G_ZEXT:
  3688. case G_SEXT:
  3689. case G_ANYEXT:
  3690. case G_FPEXT:
  3691. case G_FPTRUNC:
  3692. case G_SITOFP:
  3693. case G_UITOFP:
  3694. case G_FPTOSI:
  3695. case G_FPTOUI:
  3696. case G_INTTOPTR:
  3697. case G_PTRTOINT:
  3698. case G_ADDRSPACE_CAST:
  3699. case G_UADDO:
  3700. case G_USUBO:
  3701. case G_UADDE:
  3702. case G_USUBE:
  3703. case G_SADDO:
  3704. case G_SSUBO:
  3705. case G_SADDE:
  3706. case G_SSUBE:
  3707. case G_STRICT_FADD:
  3708. case G_STRICT_FSUB:
  3709. case G_STRICT_FMUL:
  3710. case G_STRICT_FMA:
  3711. return fewerElementsVectorMultiEltType(GMI, NumElts);
  3712. case G_ICMP:
  3713. case G_FCMP:
  3714. return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*cpm predicate*/});
  3715. case G_IS_FPCLASS:
  3716. return fewerElementsVectorMultiEltType(GMI, NumElts, {2, 3 /*mask,fpsem*/});
  3717. case G_SELECT:
  3718. if (MRI.getType(MI.getOperand(1).getReg()).isVector())
  3719. return fewerElementsVectorMultiEltType(GMI, NumElts);
  3720. return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*scalar cond*/});
  3721. case G_PHI:
  3722. return fewerElementsVectorPhi(GMI, NumElts);
  3723. case G_UNMERGE_VALUES:
  3724. return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
  3725. case G_BUILD_VECTOR:
  3726. assert(TypeIdx == 0 && "not a vector type index");
  3727. return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
  3728. case G_CONCAT_VECTORS:
  3729. if (TypeIdx != 1) // TODO: This probably does work as expected already.
  3730. return UnableToLegalize;
  3731. return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
  3732. case G_EXTRACT_VECTOR_ELT:
  3733. case G_INSERT_VECTOR_ELT:
  3734. return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
  3735. case G_LOAD:
  3736. case G_STORE:
  3737. return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy);
  3738. case G_SEXT_INREG:
  3739. return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*imm*/});
  3740. GISEL_VECREDUCE_CASES_NONSEQ
  3741. return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
  3742. case G_SHUFFLE_VECTOR:
  3743. return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
  3744. default:
  3745. return UnableToLegalize;
  3746. }
  3747. }
  3748. LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle(
  3749. MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
  3750. assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
  3751. if (TypeIdx != 0)
  3752. return UnableToLegalize;
  3753. Register DstReg = MI.getOperand(0).getReg();
  3754. Register Src1Reg = MI.getOperand(1).getReg();
  3755. Register Src2Reg = MI.getOperand(2).getReg();
  3756. ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
  3757. LLT DstTy = MRI.getType(DstReg);
  3758. LLT Src1Ty = MRI.getType(Src1Reg);
  3759. LLT Src2Ty = MRI.getType(Src2Reg);
  3760. // The shuffle should be canonicalized by now.
  3761. if (DstTy != Src1Ty)
  3762. return UnableToLegalize;
  3763. if (DstTy != Src2Ty)
  3764. return UnableToLegalize;
  3765. if (!isPowerOf2_32(DstTy.getNumElements()))
  3766. return UnableToLegalize;
  3767. // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly.
  3768. // Further legalization attempts will be needed to do split further.
  3769. NarrowTy =
  3770. DstTy.changeElementCount(DstTy.getElementCount().divideCoefficientBy(2));
  3771. unsigned NewElts = NarrowTy.getNumElements();
  3772. SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
  3773. extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs);
  3774. extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs);
  3775. Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
  3776. SplitSrc2Regs[1]};
  3777. Register Hi, Lo;
  3778. // If Lo or Hi uses elements from at most two of the four input vectors, then
  3779. // express it as a vector shuffle of those two inputs. Otherwise extract the
  3780. // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
  3781. SmallVector<int, 16> Ops;
  3782. for (unsigned High = 0; High < 2; ++High) {
  3783. Register &Output = High ? Hi : Lo;
  3784. // Build a shuffle mask for the output, discovering on the fly which
  3785. // input vectors to use as shuffle operands (recorded in InputUsed).
  3786. // If building a suitable shuffle vector proves too hard, then bail
  3787. // out with useBuildVector set.
  3788. unsigned InputUsed[2] = {-1U, -1U}; // Not yet discovered.
  3789. unsigned FirstMaskIdx = High * NewElts;
  3790. bool UseBuildVector = false;
  3791. for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
  3792. // The mask element. This indexes into the input.
  3793. int Idx = Mask[FirstMaskIdx + MaskOffset];
  3794. // The input vector this mask element indexes into.
  3795. unsigned Input = (unsigned)Idx / NewElts;
  3796. if (Input >= std::size(Inputs)) {
  3797. // The mask element does not index into any input vector.
  3798. Ops.push_back(-1);
  3799. continue;
  3800. }
  3801. // Turn the index into an offset from the start of the input vector.
  3802. Idx -= Input * NewElts;
  3803. // Find or create a shuffle vector operand to hold this input.
  3804. unsigned OpNo;
  3805. for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
  3806. if (InputUsed[OpNo] == Input) {
  3807. // This input vector is already an operand.
  3808. break;
  3809. } else if (InputUsed[OpNo] == -1U) {
  3810. // Create a new operand for this input vector.
  3811. InputUsed[OpNo] = Input;
  3812. break;
  3813. }
  3814. }
  3815. if (OpNo >= std::size(InputUsed)) {
  3816. // More than two input vectors used! Give up on trying to create a
  3817. // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
  3818. UseBuildVector = true;
  3819. break;
  3820. }
  3821. // Add the mask index for the new shuffle vector.
  3822. Ops.push_back(Idx + OpNo * NewElts);
  3823. }
  3824. if (UseBuildVector) {
  3825. LLT EltTy = NarrowTy.getElementType();
  3826. SmallVector<Register, 16> SVOps;
  3827. // Extract the input elements by hand.
  3828. for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
  3829. // The mask element. This indexes into the input.
  3830. int Idx = Mask[FirstMaskIdx + MaskOffset];
  3831. // The input vector this mask element indexes into.
  3832. unsigned Input = (unsigned)Idx / NewElts;
  3833. if (Input >= std::size(Inputs)) {
  3834. // The mask element is "undef" or indexes off the end of the input.
  3835. SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0));
  3836. continue;
  3837. }
  3838. // Turn the index into an offset from the start of the input vector.
  3839. Idx -= Input * NewElts;
  3840. // Extract the vector element by hand.
  3841. SVOps.push_back(MIRBuilder
  3842. .buildExtractVectorElement(
  3843. EltTy, Inputs[Input],
  3844. MIRBuilder.buildConstant(LLT::scalar(32), Idx))
  3845. .getReg(0));
  3846. }
  3847. // Construct the Lo/Hi output using a G_BUILD_VECTOR.
  3848. Output = MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
  3849. } else if (InputUsed[0] == -1U) {
  3850. // No input vectors were used! The result is undefined.
  3851. Output = MIRBuilder.buildUndef(NarrowTy).getReg(0);
  3852. } else {
  3853. Register Op0 = Inputs[InputUsed[0]];
  3854. // If only one input was used, use an undefined vector for the other.
  3855. Register Op1 = InputUsed[1] == -1U
  3856. ? MIRBuilder.buildUndef(NarrowTy).getReg(0)
  3857. : Inputs[InputUsed[1]];
  3858. // At least one input vector was used. Create a new shuffle vector.
  3859. Output = MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1, Ops).getReg(0);
  3860. }
  3861. Ops.clear();
  3862. }
  3863. MIRBuilder.buildConcatVectors(DstReg, {Lo, Hi});
  3864. MI.eraseFromParent();
  3865. return Legalized;
  3866. }
  3867. static unsigned getScalarOpcForReduction(unsigned Opc) {
  3868. unsigned ScalarOpc;
  3869. switch (Opc) {
  3870. case TargetOpcode::G_VECREDUCE_FADD:
  3871. ScalarOpc = TargetOpcode::G_FADD;
  3872. break;
  3873. case TargetOpcode::G_VECREDUCE_FMUL:
  3874. ScalarOpc = TargetOpcode::G_FMUL;
  3875. break;
  3876. case TargetOpcode::G_VECREDUCE_FMAX:
  3877. ScalarOpc = TargetOpcode::G_FMAXNUM;
  3878. break;
  3879. case TargetOpcode::G_VECREDUCE_FMIN:
  3880. ScalarOpc = TargetOpcode::G_FMINNUM;
  3881. break;
  3882. case TargetOpcode::G_VECREDUCE_ADD:
  3883. ScalarOpc = TargetOpcode::G_ADD;
  3884. break;
  3885. case TargetOpcode::G_VECREDUCE_MUL:
  3886. ScalarOpc = TargetOpcode::G_MUL;
  3887. break;
  3888. case TargetOpcode::G_VECREDUCE_AND:
  3889. ScalarOpc = TargetOpcode::G_AND;
  3890. break;
  3891. case TargetOpcode::G_VECREDUCE_OR:
  3892. ScalarOpc = TargetOpcode::G_OR;
  3893. break;
  3894. case TargetOpcode::G_VECREDUCE_XOR:
  3895. ScalarOpc = TargetOpcode::G_XOR;
  3896. break;
  3897. case TargetOpcode::G_VECREDUCE_SMAX:
  3898. ScalarOpc = TargetOpcode::G_SMAX;
  3899. break;
  3900. case TargetOpcode::G_VECREDUCE_SMIN:
  3901. ScalarOpc = TargetOpcode::G_SMIN;
  3902. break;
  3903. case TargetOpcode::G_VECREDUCE_UMAX:
  3904. ScalarOpc = TargetOpcode::G_UMAX;
  3905. break;
  3906. case TargetOpcode::G_VECREDUCE_UMIN:
  3907. ScalarOpc = TargetOpcode::G_UMIN;
  3908. break;
  3909. default:
  3910. llvm_unreachable("Unhandled reduction");
  3911. }
  3912. return ScalarOpc;
  3913. }
  3914. LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions(
  3915. MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
  3916. unsigned Opc = MI.getOpcode();
  3917. assert(Opc != TargetOpcode::G_VECREDUCE_SEQ_FADD &&
  3918. Opc != TargetOpcode::G_VECREDUCE_SEQ_FMUL &&
  3919. "Sequential reductions not expected");
  3920. if (TypeIdx != 1)
  3921. return UnableToLegalize;
  3922. // The semantics of the normal non-sequential reductions allow us to freely
  3923. // re-associate the operation.
  3924. Register SrcReg = MI.getOperand(1).getReg();
  3925. LLT SrcTy = MRI.getType(SrcReg);
  3926. Register DstReg = MI.getOperand(0).getReg();
  3927. LLT DstTy = MRI.getType(DstReg);
  3928. if (NarrowTy.isVector() &&
  3929. (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
  3930. return UnableToLegalize;
  3931. unsigned ScalarOpc = getScalarOpcForReduction(Opc);
  3932. SmallVector<Register> SplitSrcs;
  3933. // If NarrowTy is a scalar then we're being asked to scalarize.
  3934. const unsigned NumParts =
  3935. NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements()
  3936. : SrcTy.getNumElements();
  3937. extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs);
  3938. if (NarrowTy.isScalar()) {
  3939. if (DstTy != NarrowTy)
  3940. return UnableToLegalize; // FIXME: handle implicit extensions.
  3941. if (isPowerOf2_32(NumParts)) {
  3942. // Generate a tree of scalar operations to reduce the critical path.
  3943. SmallVector<Register> PartialResults;
  3944. unsigned NumPartsLeft = NumParts;
  3945. while (NumPartsLeft > 1) {
  3946. for (unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
  3947. PartialResults.emplace_back(
  3948. MIRBuilder
  3949. .buildInstr(ScalarOpc, {NarrowTy},
  3950. {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
  3951. .getReg(0));
  3952. }
  3953. SplitSrcs = PartialResults;
  3954. PartialResults.clear();
  3955. NumPartsLeft = SplitSrcs.size();
  3956. }
  3957. assert(SplitSrcs.size() == 1);
  3958. MIRBuilder.buildCopy(DstReg, SplitSrcs[0]);
  3959. MI.eraseFromParent();
  3960. return Legalized;
  3961. }
  3962. // If we can't generate a tree, then just do sequential operations.
  3963. Register Acc = SplitSrcs[0];
  3964. for (unsigned Idx = 1; Idx < NumParts; ++Idx)
  3965. Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
  3966. .getReg(0);
  3967. MIRBuilder.buildCopy(DstReg, Acc);
  3968. MI.eraseFromParent();
  3969. return Legalized;
  3970. }
  3971. SmallVector<Register> PartialReductions;
  3972. for (unsigned Part = 0; Part < NumParts; ++Part) {
  3973. PartialReductions.push_back(
  3974. MIRBuilder.buildInstr(Opc, {DstTy}, {SplitSrcs[Part]}).getReg(0));
  3975. }
  3976. // If the types involved are powers of 2, we can generate intermediate vector
  3977. // ops, before generating a final reduction operation.
  3978. if (isPowerOf2_32(SrcTy.getNumElements()) &&
  3979. isPowerOf2_32(NarrowTy.getNumElements())) {
  3980. return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
  3981. }
  3982. Register Acc = PartialReductions[0];
  3983. for (unsigned Part = 1; Part < NumParts; ++Part) {
  3984. if (Part == NumParts - 1) {
  3985. MIRBuilder.buildInstr(ScalarOpc, {DstReg},
  3986. {Acc, PartialReductions[Part]});
  3987. } else {
  3988. Acc = MIRBuilder
  3989. .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
  3990. .getReg(0);
  3991. }
  3992. }
  3993. MI.eraseFromParent();
  3994. return Legalized;
  3995. }
  3996. LegalizerHelper::LegalizeResult
  3997. LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
  3998. LLT SrcTy, LLT NarrowTy,
  3999. unsigned ScalarOpc) {
  4000. SmallVector<Register> SplitSrcs;
  4001. // Split the sources into NarrowTy size pieces.
  4002. extractParts(SrcReg, NarrowTy,
  4003. SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs);
  4004. // We're going to do a tree reduction using vector operations until we have
  4005. // one NarrowTy size value left.
  4006. while (SplitSrcs.size() > 1) {
  4007. SmallVector<Register> PartialRdxs;
  4008. for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
  4009. Register LHS = SplitSrcs[Idx];
  4010. Register RHS = SplitSrcs[Idx + 1];
  4011. // Create the intermediate vector op.
  4012. Register Res =
  4013. MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0);
  4014. PartialRdxs.push_back(Res);
  4015. }
  4016. SplitSrcs = std::move(PartialRdxs);
  4017. }
  4018. // Finally generate the requested NarrowTy based reduction.
  4019. Observer.changingInstr(MI);
  4020. MI.getOperand(1).setReg(SplitSrcs[0]);
  4021. Observer.changedInstr(MI);
  4022. return Legalized;
  4023. }
  4024. LegalizerHelper::LegalizeResult
  4025. LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt,
  4026. const LLT HalfTy, const LLT AmtTy) {
  4027. Register InL = MRI.createGenericVirtualRegister(HalfTy);
  4028. Register InH = MRI.createGenericVirtualRegister(HalfTy);
  4029. MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
  4030. if (Amt.isZero()) {
  4031. MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {InL, InH});
  4032. MI.eraseFromParent();
  4033. return Legalized;
  4034. }
  4035. LLT NVT = HalfTy;
  4036. unsigned NVTBits = HalfTy.getSizeInBits();
  4037. unsigned VTBits = 2 * NVTBits;
  4038. SrcOp Lo(Register(0)), Hi(Register(0));
  4039. if (MI.getOpcode() == TargetOpcode::G_SHL) {
  4040. if (Amt.ugt(VTBits)) {
  4041. Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
  4042. } else if (Amt.ugt(NVTBits)) {
  4043. Lo = MIRBuilder.buildConstant(NVT, 0);
  4044. Hi = MIRBuilder.buildShl(NVT, InL,
  4045. MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
  4046. } else if (Amt == NVTBits) {
  4047. Lo = MIRBuilder.buildConstant(NVT, 0);
  4048. Hi = InL;
  4049. } else {
  4050. Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
  4051. auto OrLHS =
  4052. MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
  4053. auto OrRHS = MIRBuilder.buildLShr(
  4054. NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
  4055. Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
  4056. }
  4057. } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
  4058. if (Amt.ugt(VTBits)) {
  4059. Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
  4060. } else if (Amt.ugt(NVTBits)) {
  4061. Lo = MIRBuilder.buildLShr(NVT, InH,
  4062. MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
  4063. Hi = MIRBuilder.buildConstant(NVT, 0);
  4064. } else if (Amt == NVTBits) {
  4065. Lo = InH;
  4066. Hi = MIRBuilder.buildConstant(NVT, 0);
  4067. } else {
  4068. auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
  4069. auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
  4070. auto OrRHS = MIRBuilder.buildShl(
  4071. NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
  4072. Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
  4073. Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
  4074. }
  4075. } else {
  4076. if (Amt.ugt(VTBits)) {
  4077. Hi = Lo = MIRBuilder.buildAShr(
  4078. NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
  4079. } else if (Amt.ugt(NVTBits)) {
  4080. Lo = MIRBuilder.buildAShr(NVT, InH,
  4081. MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
  4082. Hi = MIRBuilder.buildAShr(NVT, InH,
  4083. MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
  4084. } else if (Amt == NVTBits) {
  4085. Lo = InH;
  4086. Hi = MIRBuilder.buildAShr(NVT, InH,
  4087. MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
  4088. } else {
  4089. auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
  4090. auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
  4091. auto OrRHS = MIRBuilder.buildShl(
  4092. NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
  4093. Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
  4094. Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
  4095. }
  4096. }
  4097. MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {Lo, Hi});
  4098. MI.eraseFromParent();
  4099. return Legalized;
  4100. }
  4101. // TODO: Optimize if constant shift amount.
  4102. LegalizerHelper::LegalizeResult
  4103. LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx,
  4104. LLT RequestedTy) {
  4105. if (TypeIdx == 1) {
  4106. Observer.changingInstr(MI);
  4107. narrowScalarSrc(MI, RequestedTy, 2);
  4108. Observer.changedInstr(MI);
  4109. return Legalized;
  4110. }
  4111. Register DstReg = MI.getOperand(0).getReg();
  4112. LLT DstTy = MRI.getType(DstReg);
  4113. if (DstTy.isVector())
  4114. return UnableToLegalize;
  4115. Register Amt = MI.getOperand(2).getReg();
  4116. LLT ShiftAmtTy = MRI.getType(Amt);
  4117. const unsigned DstEltSize = DstTy.getScalarSizeInBits();
  4118. if (DstEltSize % 2 != 0)
  4119. return UnableToLegalize;
  4120. // Ignore the input type. We can only go to exactly half the size of the
  4121. // input. If that isn't small enough, the resulting pieces will be further
  4122. // legalized.
  4123. const unsigned NewBitSize = DstEltSize / 2;
  4124. const LLT HalfTy = LLT::scalar(NewBitSize);
  4125. const LLT CondTy = LLT::scalar(1);
  4126. if (auto VRegAndVal = getIConstantVRegValWithLookThrough(Amt, MRI)) {
  4127. return narrowScalarShiftByConstant(MI, VRegAndVal->Value, HalfTy,
  4128. ShiftAmtTy);
  4129. }
  4130. // TODO: Expand with known bits.
  4131. // Handle the fully general expansion by an unknown amount.
  4132. auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
  4133. Register InL = MRI.createGenericVirtualRegister(HalfTy);
  4134. Register InH = MRI.createGenericVirtualRegister(HalfTy);
  4135. MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
  4136. auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
  4137. auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
  4138. auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
  4139. auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
  4140. auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
  4141. Register ResultRegs[2];
  4142. switch (MI.getOpcode()) {
  4143. case TargetOpcode::G_SHL: {
  4144. // Short: ShAmt < NewBitSize
  4145. auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
  4146. auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
  4147. auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
  4148. auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
  4149. // Long: ShAmt >= NewBitSize
  4150. auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
  4151. auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
  4152. auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
  4153. auto Hi = MIRBuilder.buildSelect(
  4154. HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
  4155. ResultRegs[0] = Lo.getReg(0);
  4156. ResultRegs[1] = Hi.getReg(0);
  4157. break;
  4158. }
  4159. case TargetOpcode::G_LSHR:
  4160. case TargetOpcode::G_ASHR: {
  4161. // Short: ShAmt < NewBitSize
  4162. auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
  4163. auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
  4164. auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
  4165. auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
  4166. // Long: ShAmt >= NewBitSize
  4167. MachineInstrBuilder HiL;
  4168. if (MI.getOpcode() == TargetOpcode::G_LSHR) {
  4169. HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
  4170. } else {
  4171. auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
  4172. HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part.
  4173. }
  4174. auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
  4175. {InH, AmtExcess}); // Lo from Hi part.
  4176. auto Lo = MIRBuilder.buildSelect(
  4177. HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
  4178. auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
  4179. ResultRegs[0] = Lo.getReg(0);
  4180. ResultRegs[1] = Hi.getReg(0);
  4181. break;
  4182. }
  4183. default:
  4184. llvm_unreachable("not a shift");
  4185. }
  4186. MIRBuilder.buildMergeLikeInstr(DstReg, ResultRegs);
  4187. MI.eraseFromParent();
  4188. return Legalized;
  4189. }
  4190. LegalizerHelper::LegalizeResult
  4191. LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
  4192. LLT MoreTy) {
  4193. assert(TypeIdx == 0 && "Expecting only Idx 0");
  4194. Observer.changingInstr(MI);
  4195. for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
  4196. MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
  4197. MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
  4198. moreElementsVectorSrc(MI, MoreTy, I);
  4199. }
  4200. MachineBasicBlock &MBB = *MI.getParent();
  4201. MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
  4202. moreElementsVectorDst(MI, MoreTy, 0);
  4203. Observer.changedInstr(MI);
  4204. return Legalized;
  4205. }
  4206. LegalizerHelper::LegalizeResult
  4207. LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
  4208. LLT MoreTy) {
  4209. unsigned Opc = MI.getOpcode();
  4210. switch (Opc) {
  4211. case TargetOpcode::G_IMPLICIT_DEF:
  4212. case TargetOpcode::G_LOAD: {
  4213. if (TypeIdx != 0)
  4214. return UnableToLegalize;
  4215. Observer.changingInstr(MI);
  4216. moreElementsVectorDst(MI, MoreTy, 0);
  4217. Observer.changedInstr(MI);
  4218. return Legalized;
  4219. }
  4220. case TargetOpcode::G_STORE:
  4221. if (TypeIdx != 0)
  4222. return UnableToLegalize;
  4223. Observer.changingInstr(MI);
  4224. moreElementsVectorSrc(MI, MoreTy, 0);
  4225. Observer.changedInstr(MI);
  4226. return Legalized;
  4227. case TargetOpcode::G_AND:
  4228. case TargetOpcode::G_OR:
  4229. case TargetOpcode::G_XOR:
  4230. case TargetOpcode::G_ADD:
  4231. case TargetOpcode::G_SUB:
  4232. case TargetOpcode::G_MUL:
  4233. case TargetOpcode::G_FADD:
  4234. case TargetOpcode::G_FMUL:
  4235. case TargetOpcode::G_UADDSAT:
  4236. case TargetOpcode::G_USUBSAT:
  4237. case TargetOpcode::G_SADDSAT:
  4238. case TargetOpcode::G_SSUBSAT:
  4239. case TargetOpcode::G_SMIN:
  4240. case TargetOpcode::G_SMAX:
  4241. case TargetOpcode::G_UMIN:
  4242. case TargetOpcode::G_UMAX:
  4243. case TargetOpcode::G_FMINNUM:
  4244. case TargetOpcode::G_FMAXNUM:
  4245. case TargetOpcode::G_FMINNUM_IEEE:
  4246. case TargetOpcode::G_FMAXNUM_IEEE:
  4247. case TargetOpcode::G_FMINIMUM:
  4248. case TargetOpcode::G_FMAXIMUM:
  4249. case TargetOpcode::G_STRICT_FADD:
  4250. case TargetOpcode::G_STRICT_FSUB:
  4251. case TargetOpcode::G_STRICT_FMUL: {
  4252. Observer.changingInstr(MI);
  4253. moreElementsVectorSrc(MI, MoreTy, 1);
  4254. moreElementsVectorSrc(MI, MoreTy, 2);
  4255. moreElementsVectorDst(MI, MoreTy, 0);
  4256. Observer.changedInstr(MI);
  4257. return Legalized;
  4258. }
  4259. case TargetOpcode::G_FMA:
  4260. case TargetOpcode::G_STRICT_FMA:
  4261. case TargetOpcode::G_FSHR:
  4262. case TargetOpcode::G_FSHL: {
  4263. Observer.changingInstr(MI);
  4264. moreElementsVectorSrc(MI, MoreTy, 1);
  4265. moreElementsVectorSrc(MI, MoreTy, 2);
  4266. moreElementsVectorSrc(MI, MoreTy, 3);
  4267. moreElementsVectorDst(MI, MoreTy, 0);
  4268. Observer.changedInstr(MI);
  4269. return Legalized;
  4270. }
  4271. case TargetOpcode::G_EXTRACT:
  4272. if (TypeIdx != 1)
  4273. return UnableToLegalize;
  4274. Observer.changingInstr(MI);
  4275. moreElementsVectorSrc(MI, MoreTy, 1);
  4276. Observer.changedInstr(MI);
  4277. return Legalized;
  4278. case TargetOpcode::G_INSERT:
  4279. case TargetOpcode::G_FREEZE:
  4280. case TargetOpcode::G_FNEG:
  4281. case TargetOpcode::G_FABS:
  4282. case TargetOpcode::G_BSWAP:
  4283. case TargetOpcode::G_FCANONICALIZE:
  4284. case TargetOpcode::G_SEXT_INREG:
  4285. if (TypeIdx != 0)
  4286. return UnableToLegalize;
  4287. Observer.changingInstr(MI);
  4288. moreElementsVectorSrc(MI, MoreTy, 1);
  4289. moreElementsVectorDst(MI, MoreTy, 0);
  4290. Observer.changedInstr(MI);
  4291. return Legalized;
  4292. case TargetOpcode::G_SELECT: {
  4293. Register DstReg = MI.getOperand(0).getReg();
  4294. Register CondReg = MI.getOperand(1).getReg();
  4295. LLT DstTy = MRI.getType(DstReg);
  4296. LLT CondTy = MRI.getType(CondReg);
  4297. if (TypeIdx == 1) {
  4298. if (!CondTy.isScalar() ||
  4299. DstTy.getElementCount() != MoreTy.getElementCount())
  4300. return UnableToLegalize;
  4301. // This is turning a scalar select of vectors into a vector
  4302. // select. Broadcast the select condition.
  4303. auto ShufSplat = MIRBuilder.buildShuffleSplat(MoreTy, CondReg);
  4304. Observer.changingInstr(MI);
  4305. MI.getOperand(1).setReg(ShufSplat.getReg(0));
  4306. Observer.changedInstr(MI);
  4307. return Legalized;
  4308. }
  4309. if (CondTy.isVector())
  4310. return UnableToLegalize;
  4311. Observer.changingInstr(MI);
  4312. moreElementsVectorSrc(MI, MoreTy, 2);
  4313. moreElementsVectorSrc(MI, MoreTy, 3);
  4314. moreElementsVectorDst(MI, MoreTy, 0);
  4315. Observer.changedInstr(MI);
  4316. return Legalized;
  4317. }
  4318. case TargetOpcode::G_UNMERGE_VALUES:
  4319. return UnableToLegalize;
  4320. case TargetOpcode::G_PHI:
  4321. return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
  4322. case TargetOpcode::G_SHUFFLE_VECTOR:
  4323. return moreElementsVectorShuffle(MI, TypeIdx, MoreTy);
  4324. case TargetOpcode::G_BUILD_VECTOR: {
  4325. SmallVector<SrcOp, 8> Elts;
  4326. for (auto Op : MI.uses()) {
  4327. Elts.push_back(Op.getReg());
  4328. }
  4329. for (unsigned i = Elts.size(); i < MoreTy.getNumElements(); ++i) {
  4330. Elts.push_back(MIRBuilder.buildUndef(MoreTy.getScalarType()));
  4331. }
  4332. MIRBuilder.buildDeleteTrailingVectorElements(
  4333. MI.getOperand(0).getReg(), MIRBuilder.buildInstr(Opc, {MoreTy}, Elts));
  4334. MI.eraseFromParent();
  4335. return Legalized;
  4336. }
  4337. case TargetOpcode::G_TRUNC: {
  4338. Observer.changingInstr(MI);
  4339. moreElementsVectorSrc(MI, MoreTy, 1);
  4340. moreElementsVectorDst(MI, MoreTy, 0);
  4341. Observer.changedInstr(MI);
  4342. return Legalized;
  4343. }
  4344. default:
  4345. return UnableToLegalize;
  4346. }
  4347. }
  4348. /// Expand source vectors to the size of destination vector.
  4349. static LegalizerHelper::LegalizeResult
  4350. equalizeVectorShuffleLengths(MachineInstr &MI, MachineIRBuilder &MIRBuilder) {
  4351. MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
  4352. LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
  4353. LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
  4354. ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
  4355. unsigned MaskNumElts = Mask.size();
  4356. unsigned SrcNumElts = SrcTy.getNumElements();
  4357. Register DstReg = MI.getOperand(0).getReg();
  4358. LLT DestEltTy = DstTy.getElementType();
  4359. // TODO: Normalize the shuffle vector since mask and vector length don't
  4360. // match.
  4361. if (MaskNumElts <= SrcNumElts) {
  4362. return LegalizerHelper::LegalizeResult::UnableToLegalize;
  4363. }
  4364. unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
  4365. unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
  4366. LLT PaddedTy = LLT::fixed_vector(PaddedMaskNumElts, DestEltTy);
  4367. // Create new source vectors by concatenating the initial
  4368. // source vectors with undefined vectors of the same size.
  4369. auto Undef = MIRBuilder.buildUndef(SrcTy);
  4370. SmallVector<Register, 8> MOps1(NumConcat, Undef.getReg(0));
  4371. SmallVector<Register, 8> MOps2(NumConcat, Undef.getReg(0));
  4372. MOps1[0] = MI.getOperand(1).getReg();
  4373. MOps2[0] = MI.getOperand(2).getReg();
  4374. auto Src1 = MIRBuilder.buildConcatVectors(PaddedTy, MOps1);
  4375. auto Src2 = MIRBuilder.buildConcatVectors(PaddedTy, MOps2);
  4376. // Readjust mask for new input vector length.
  4377. SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
  4378. for (unsigned I = 0; I != MaskNumElts; ++I) {
  4379. int Idx = Mask[I];
  4380. if (Idx >= static_cast<int>(SrcNumElts))
  4381. Idx += PaddedMaskNumElts - SrcNumElts;
  4382. MappedOps[I] = Idx;
  4383. }
  4384. // If we got more elements than required, extract subvector.
  4385. if (MaskNumElts != PaddedMaskNumElts) {
  4386. auto Shuffle =
  4387. MIRBuilder.buildShuffleVector(PaddedTy, Src1, Src2, MappedOps);
  4388. SmallVector<Register, 16> Elts(MaskNumElts);
  4389. for (unsigned I = 0; I < MaskNumElts; ++I) {
  4390. Elts[I] =
  4391. MIRBuilder.buildExtractVectorElementConstant(DestEltTy, Shuffle, I)
  4392. .getReg(0);
  4393. }
  4394. MIRBuilder.buildBuildVector(DstReg, Elts);
  4395. } else {
  4396. MIRBuilder.buildShuffleVector(DstReg, Src1, Src2, MappedOps);
  4397. }
  4398. MI.eraseFromParent();
  4399. return LegalizerHelper::LegalizeResult::Legalized;
  4400. }
  4401. LegalizerHelper::LegalizeResult
  4402. LegalizerHelper::moreElementsVectorShuffle(MachineInstr &MI,
  4403. unsigned int TypeIdx, LLT MoreTy) {
  4404. Register DstReg = MI.getOperand(0).getReg();
  4405. Register Src1Reg = MI.getOperand(1).getReg();
  4406. Register Src2Reg = MI.getOperand(2).getReg();
  4407. ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
  4408. LLT DstTy = MRI.getType(DstReg);
  4409. LLT Src1Ty = MRI.getType(Src1Reg);
  4410. LLT Src2Ty = MRI.getType(Src2Reg);
  4411. unsigned NumElts = DstTy.getNumElements();
  4412. unsigned WidenNumElts = MoreTy.getNumElements();
  4413. if (DstTy.isVector() && Src1Ty.isVector() &&
  4414. DstTy.getNumElements() > Src1Ty.getNumElements()) {
  4415. return equalizeVectorShuffleLengths(MI, MIRBuilder);
  4416. }
  4417. if (TypeIdx != 0)
  4418. return UnableToLegalize;
  4419. // Expect a canonicalized shuffle.
  4420. if (DstTy != Src1Ty || DstTy != Src2Ty)
  4421. return UnableToLegalize;
  4422. moreElementsVectorSrc(MI, MoreTy, 1);
  4423. moreElementsVectorSrc(MI, MoreTy, 2);
  4424. // Adjust mask based on new input vector length.
  4425. SmallVector<int, 16> NewMask;
  4426. for (unsigned I = 0; I != NumElts; ++I) {
  4427. int Idx = Mask[I];
  4428. if (Idx < static_cast<int>(NumElts))
  4429. NewMask.push_back(Idx);
  4430. else
  4431. NewMask.push_back(Idx - NumElts + WidenNumElts);
  4432. }
  4433. for (unsigned I = NumElts; I != WidenNumElts; ++I)
  4434. NewMask.push_back(-1);
  4435. moreElementsVectorDst(MI, MoreTy, 0);
  4436. MIRBuilder.setInstrAndDebugLoc(MI);
  4437. MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
  4438. MI.getOperand(1).getReg(),
  4439. MI.getOperand(2).getReg(), NewMask);
  4440. MI.eraseFromParent();
  4441. return Legalized;
  4442. }
  4443. void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
  4444. ArrayRef<Register> Src1Regs,
  4445. ArrayRef<Register> Src2Regs,
  4446. LLT NarrowTy) {
  4447. MachineIRBuilder &B = MIRBuilder;
  4448. unsigned SrcParts = Src1Regs.size();
  4449. unsigned DstParts = DstRegs.size();
  4450. unsigned DstIdx = 0; // Low bits of the result.
  4451. Register FactorSum =
  4452. B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
  4453. DstRegs[DstIdx] = FactorSum;
  4454. unsigned CarrySumPrevDstIdx;
  4455. SmallVector<Register, 4> Factors;
  4456. for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
  4457. // Collect low parts of muls for DstIdx.
  4458. for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
  4459. i <= std::min(DstIdx, SrcParts - 1); ++i) {
  4460. MachineInstrBuilder Mul =
  4461. B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
  4462. Factors.push_back(Mul.getReg(0));
  4463. }
  4464. // Collect high parts of muls from previous DstIdx.
  4465. for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
  4466. i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
  4467. MachineInstrBuilder Umulh =
  4468. B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
  4469. Factors.push_back(Umulh.getReg(0));
  4470. }
  4471. // Add CarrySum from additions calculated for previous DstIdx.
  4472. if (DstIdx != 1) {
  4473. Factors.push_back(CarrySumPrevDstIdx);
  4474. }
  4475. Register CarrySum;
  4476. // Add all factors and accumulate all carries into CarrySum.
  4477. if (DstIdx != DstParts - 1) {
  4478. MachineInstrBuilder Uaddo =
  4479. B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
  4480. FactorSum = Uaddo.getReg(0);
  4481. CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
  4482. for (unsigned i = 2; i < Factors.size(); ++i) {
  4483. MachineInstrBuilder Uaddo =
  4484. B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
  4485. FactorSum = Uaddo.getReg(0);
  4486. MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
  4487. CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
  4488. }
  4489. } else {
  4490. // Since value for the next index is not calculated, neither is CarrySum.
  4491. FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
  4492. for (unsigned i = 2; i < Factors.size(); ++i)
  4493. FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
  4494. }
  4495. CarrySumPrevDstIdx = CarrySum;
  4496. DstRegs[DstIdx] = FactorSum;
  4497. Factors.clear();
  4498. }
  4499. }
  4500. LegalizerHelper::LegalizeResult
  4501. LegalizerHelper::narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx,
  4502. LLT NarrowTy) {
  4503. if (TypeIdx != 0)
  4504. return UnableToLegalize;
  4505. Register DstReg = MI.getOperand(0).getReg();
  4506. LLT DstType = MRI.getType(DstReg);
  4507. // FIXME: add support for vector types
  4508. if (DstType.isVector())
  4509. return UnableToLegalize;
  4510. unsigned Opcode = MI.getOpcode();
  4511. unsigned OpO, OpE, OpF;
  4512. switch (Opcode) {
  4513. case TargetOpcode::G_SADDO:
  4514. case TargetOpcode::G_SADDE:
  4515. case TargetOpcode::G_UADDO:
  4516. case TargetOpcode::G_UADDE:
  4517. case TargetOpcode::G_ADD:
  4518. OpO = TargetOpcode::G_UADDO;
  4519. OpE = TargetOpcode::G_UADDE;
  4520. OpF = TargetOpcode::G_UADDE;
  4521. if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
  4522. OpF = TargetOpcode::G_SADDE;
  4523. break;
  4524. case TargetOpcode::G_SSUBO:
  4525. case TargetOpcode::G_SSUBE:
  4526. case TargetOpcode::G_USUBO:
  4527. case TargetOpcode::G_USUBE:
  4528. case TargetOpcode::G_SUB:
  4529. OpO = TargetOpcode::G_USUBO;
  4530. OpE = TargetOpcode::G_USUBE;
  4531. OpF = TargetOpcode::G_USUBE;
  4532. if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
  4533. OpF = TargetOpcode::G_SSUBE;
  4534. break;
  4535. default:
  4536. llvm_unreachable("Unexpected add/sub opcode!");
  4537. }
  4538. // 1 for a plain add/sub, 2 if this is an operation with a carry-out.
  4539. unsigned NumDefs = MI.getNumExplicitDefs();
  4540. Register Src1 = MI.getOperand(NumDefs).getReg();
  4541. Register Src2 = MI.getOperand(NumDefs + 1).getReg();
  4542. Register CarryDst, CarryIn;
  4543. if (NumDefs == 2)
  4544. CarryDst = MI.getOperand(1).getReg();
  4545. if (MI.getNumOperands() == NumDefs + 3)
  4546. CarryIn = MI.getOperand(NumDefs + 2).getReg();
  4547. LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
  4548. LLT LeftoverTy, DummyTy;
  4549. SmallVector<Register, 2> Src1Regs, Src2Regs, Src1Left, Src2Left, DstRegs;
  4550. extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left);
  4551. extractParts(Src2, RegTy, NarrowTy, DummyTy, Src2Regs, Src2Left);
  4552. int NarrowParts = Src1Regs.size();
  4553. for (int I = 0, E = Src1Left.size(); I != E; ++I) {
  4554. Src1Regs.push_back(Src1Left[I]);
  4555. Src2Regs.push_back(Src2Left[I]);
  4556. }
  4557. DstRegs.reserve(Src1Regs.size());
  4558. for (int i = 0, e = Src1Regs.size(); i != e; ++i) {
  4559. Register DstReg =
  4560. MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i]));
  4561. Register CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
  4562. // Forward the final carry-out to the destination register
  4563. if (i == e - 1 && CarryDst)
  4564. CarryOut = CarryDst;
  4565. if (!CarryIn) {
  4566. MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
  4567. {Src1Regs[i], Src2Regs[i]});
  4568. } else if (i == e - 1) {
  4569. MIRBuilder.buildInstr(OpF, {DstReg, CarryOut},
  4570. {Src1Regs[i], Src2Regs[i], CarryIn});
  4571. } else {
  4572. MIRBuilder.buildInstr(OpE, {DstReg, CarryOut},
  4573. {Src1Regs[i], Src2Regs[i], CarryIn});
  4574. }
  4575. DstRegs.push_back(DstReg);
  4576. CarryIn = CarryOut;
  4577. }
  4578. insertParts(MI.getOperand(0).getReg(), RegTy, NarrowTy,
  4579. ArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
  4580. ArrayRef(DstRegs).drop_front(NarrowParts));
  4581. MI.eraseFromParent();
  4582. return Legalized;
  4583. }
  4584. LegalizerHelper::LegalizeResult
  4585. LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
  4586. Register DstReg = MI.getOperand(0).getReg();
  4587. Register Src1 = MI.getOperand(1).getReg();
  4588. Register Src2 = MI.getOperand(2).getReg();
  4589. LLT Ty = MRI.getType(DstReg);
  4590. if (Ty.isVector())
  4591. return UnableToLegalize;
  4592. unsigned Size = Ty.getSizeInBits();
  4593. unsigned NarrowSize = NarrowTy.getSizeInBits();
  4594. if (Size % NarrowSize != 0)
  4595. return UnableToLegalize;
  4596. unsigned NumParts = Size / NarrowSize;
  4597. bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
  4598. unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
  4599. SmallVector<Register, 2> Src1Parts, Src2Parts;
  4600. SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
  4601. extractParts(Src1, NarrowTy, NumParts, Src1Parts);
  4602. extractParts(Src2, NarrowTy, NumParts, Src2Parts);
  4603. multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
  4604. // Take only high half of registers if this is high mul.
  4605. ArrayRef<Register> DstRegs(&DstTmpRegs[DstTmpParts - NumParts], NumParts);
  4606. MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
  4607. MI.eraseFromParent();
  4608. return Legalized;
  4609. }
  4610. LegalizerHelper::LegalizeResult
  4611. LegalizerHelper::narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx,
  4612. LLT NarrowTy) {
  4613. if (TypeIdx != 0)
  4614. return UnableToLegalize;
  4615. bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI;
  4616. Register Src = MI.getOperand(1).getReg();
  4617. LLT SrcTy = MRI.getType(Src);
  4618. // If all finite floats fit into the narrowed integer type, we can just swap
  4619. // out the result type. This is practically only useful for conversions from
  4620. // half to at least 16-bits, so just handle the one case.
  4621. if (SrcTy.getScalarType() != LLT::scalar(16) ||
  4622. NarrowTy.getScalarSizeInBits() < (IsSigned ? 17u : 16u))
  4623. return UnableToLegalize;
  4624. Observer.changingInstr(MI);
  4625. narrowScalarDst(MI, NarrowTy, 0,
  4626. IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
  4627. Observer.changedInstr(MI);
  4628. return Legalized;
  4629. }
  4630. LegalizerHelper::LegalizeResult
  4631. LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx,
  4632. LLT NarrowTy) {
  4633. if (TypeIdx != 1)
  4634. return UnableToLegalize;
  4635. uint64_t NarrowSize = NarrowTy.getSizeInBits();
  4636. int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
  4637. // FIXME: add support for when SizeOp1 isn't an exact multiple of
  4638. // NarrowSize.
  4639. if (SizeOp1 % NarrowSize != 0)
  4640. return UnableToLegalize;
  4641. int NumParts = SizeOp1 / NarrowSize;
  4642. SmallVector<Register, 2> SrcRegs, DstRegs;
  4643. SmallVector<uint64_t, 2> Indexes;
  4644. extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
  4645. Register OpReg = MI.getOperand(0).getReg();
  4646. uint64_t OpStart = MI.getOperand(2).getImm();
  4647. uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
  4648. for (int i = 0; i < NumParts; ++i) {
  4649. unsigned SrcStart = i * NarrowSize;
  4650. if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
  4651. // No part of the extract uses this subregister, ignore it.
  4652. continue;
  4653. } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
  4654. // The entire subregister is extracted, forward the value.
  4655. DstRegs.push_back(SrcRegs[i]);
  4656. continue;
  4657. }
  4658. // OpSegStart is where this destination segment would start in OpReg if it
  4659. // extended infinitely in both directions.
  4660. int64_t ExtractOffset;
  4661. uint64_t SegSize;
  4662. if (OpStart < SrcStart) {
  4663. ExtractOffset = 0;
  4664. SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
  4665. } else {
  4666. ExtractOffset = OpStart - SrcStart;
  4667. SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
  4668. }
  4669. Register SegReg = SrcRegs[i];
  4670. if (ExtractOffset != 0 || SegSize != NarrowSize) {
  4671. // A genuine extract is needed.
  4672. SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
  4673. MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
  4674. }
  4675. DstRegs.push_back(SegReg);
  4676. }
  4677. Register DstReg = MI.getOperand(0).getReg();
  4678. if (MRI.getType(DstReg).isVector())
  4679. MIRBuilder.buildBuildVector(DstReg, DstRegs);
  4680. else if (DstRegs.size() > 1)
  4681. MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
  4682. else
  4683. MIRBuilder.buildCopy(DstReg, DstRegs[0]);
  4684. MI.eraseFromParent();
  4685. return Legalized;
  4686. }
  4687. LegalizerHelper::LegalizeResult
  4688. LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx,
  4689. LLT NarrowTy) {
  4690. // FIXME: Don't know how to handle secondary types yet.
  4691. if (TypeIdx != 0)
  4692. return UnableToLegalize;
  4693. SmallVector<Register, 2> SrcRegs, LeftoverRegs, DstRegs;
  4694. SmallVector<uint64_t, 2> Indexes;
  4695. LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
  4696. LLT LeftoverTy;
  4697. extractParts(MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
  4698. LeftoverRegs);
  4699. for (Register Reg : LeftoverRegs)
  4700. SrcRegs.push_back(Reg);
  4701. uint64_t NarrowSize = NarrowTy.getSizeInBits();
  4702. Register OpReg = MI.getOperand(2).getReg();
  4703. uint64_t OpStart = MI.getOperand(3).getImm();
  4704. uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
  4705. for (int I = 0, E = SrcRegs.size(); I != E; ++I) {
  4706. unsigned DstStart = I * NarrowSize;
  4707. if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
  4708. // The entire subregister is defined by this insert, forward the new
  4709. // value.
  4710. DstRegs.push_back(OpReg);
  4711. continue;
  4712. }
  4713. Register SrcReg = SrcRegs[I];
  4714. if (MRI.getType(SrcRegs[I]) == LeftoverTy) {
  4715. // The leftover reg is smaller than NarrowTy, so we need to extend it.
  4716. SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
  4717. MIRBuilder.buildAnyExt(SrcReg, SrcRegs[I]);
  4718. }
  4719. if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
  4720. // No part of the insert affects this subregister, forward the original.
  4721. DstRegs.push_back(SrcReg);
  4722. continue;
  4723. }
  4724. // OpSegStart is where this destination segment would start in OpReg if it
  4725. // extended infinitely in both directions.
  4726. int64_t ExtractOffset, InsertOffset;
  4727. uint64_t SegSize;
  4728. if (OpStart < DstStart) {
  4729. InsertOffset = 0;
  4730. ExtractOffset = DstStart - OpStart;
  4731. SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
  4732. } else {
  4733. InsertOffset = OpStart - DstStart;
  4734. ExtractOffset = 0;
  4735. SegSize =
  4736. std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
  4737. }
  4738. Register SegReg = OpReg;
  4739. if (ExtractOffset != 0 || SegSize != OpSize) {
  4740. // A genuine extract is needed.
  4741. SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
  4742. MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
  4743. }
  4744. Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
  4745. MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset);
  4746. DstRegs.push_back(DstReg);
  4747. }
  4748. uint64_t WideSize = DstRegs.size() * NarrowSize;
  4749. Register DstReg = MI.getOperand(0).getReg();
  4750. if (WideSize > RegTy.getSizeInBits()) {
  4751. Register MergeReg = MRI.createGenericVirtualRegister(LLT::scalar(WideSize));
  4752. MIRBuilder.buildMergeLikeInstr(MergeReg, DstRegs);
  4753. MIRBuilder.buildTrunc(DstReg, MergeReg);
  4754. } else
  4755. MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
  4756. MI.eraseFromParent();
  4757. return Legalized;
  4758. }
  4759. LegalizerHelper::LegalizeResult
  4760. LegalizerHelper::narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx,
  4761. LLT NarrowTy) {
  4762. Register DstReg = MI.getOperand(0).getReg();
  4763. LLT DstTy = MRI.getType(DstReg);
  4764. assert(MI.getNumOperands() == 3 && TypeIdx == 0);
  4765. SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
  4766. SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
  4767. SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
  4768. LLT LeftoverTy;
  4769. if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
  4770. Src0Regs, Src0LeftoverRegs))
  4771. return UnableToLegalize;
  4772. LLT Unused;
  4773. if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
  4774. Src1Regs, Src1LeftoverRegs))
  4775. llvm_unreachable("inconsistent extractParts result");
  4776. for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
  4777. auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
  4778. {Src0Regs[I], Src1Regs[I]});
  4779. DstRegs.push_back(Inst.getReg(0));
  4780. }
  4781. for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
  4782. auto Inst = MIRBuilder.buildInstr(
  4783. MI.getOpcode(),
  4784. {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
  4785. DstLeftoverRegs.push_back(Inst.getReg(0));
  4786. }
  4787. insertParts(DstReg, DstTy, NarrowTy, DstRegs,
  4788. LeftoverTy, DstLeftoverRegs);
  4789. MI.eraseFromParent();
  4790. return Legalized;
  4791. }
  4792. LegalizerHelper::LegalizeResult
  4793. LegalizerHelper::narrowScalarExt(MachineInstr &MI, unsigned TypeIdx,
  4794. LLT NarrowTy) {
  4795. if (TypeIdx != 0)
  4796. return UnableToLegalize;
  4797. Register DstReg = MI.getOperand(0).getReg();
  4798. Register SrcReg = MI.getOperand(1).getReg();
  4799. LLT DstTy = MRI.getType(DstReg);
  4800. if (DstTy.isVector())
  4801. return UnableToLegalize;
  4802. SmallVector<Register, 8> Parts;
  4803. LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
  4804. LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
  4805. buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
  4806. MI.eraseFromParent();
  4807. return Legalized;
  4808. }
  4809. LegalizerHelper::LegalizeResult
  4810. LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx,
  4811. LLT NarrowTy) {
  4812. if (TypeIdx != 0)
  4813. return UnableToLegalize;
  4814. Register CondReg = MI.getOperand(1).getReg();
  4815. LLT CondTy = MRI.getType(CondReg);
  4816. if (CondTy.isVector()) // TODO: Handle vselect
  4817. return UnableToLegalize;
  4818. Register DstReg = MI.getOperand(0).getReg();
  4819. LLT DstTy = MRI.getType(DstReg);
  4820. SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
  4821. SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
  4822. SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
  4823. LLT LeftoverTy;
  4824. if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
  4825. Src1Regs, Src1LeftoverRegs))
  4826. return UnableToLegalize;
  4827. LLT Unused;
  4828. if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
  4829. Src2Regs, Src2LeftoverRegs))
  4830. llvm_unreachable("inconsistent extractParts result");
  4831. for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
  4832. auto Select = MIRBuilder.buildSelect(NarrowTy,
  4833. CondReg, Src1Regs[I], Src2Regs[I]);
  4834. DstRegs.push_back(Select.getReg(0));
  4835. }
  4836. for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
  4837. auto Select = MIRBuilder.buildSelect(
  4838. LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
  4839. DstLeftoverRegs.push_back(Select.getReg(0));
  4840. }
  4841. insertParts(DstReg, DstTy, NarrowTy, DstRegs,
  4842. LeftoverTy, DstLeftoverRegs);
  4843. MI.eraseFromParent();
  4844. return Legalized;
  4845. }
  4846. LegalizerHelper::LegalizeResult
  4847. LegalizerHelper::narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx,
  4848. LLT NarrowTy) {
  4849. if (TypeIdx != 1)
  4850. return UnableToLegalize;
  4851. Register DstReg = MI.getOperand(0).getReg();
  4852. Register SrcReg = MI.getOperand(1).getReg();
  4853. LLT DstTy = MRI.getType(DstReg);
  4854. LLT SrcTy = MRI.getType(SrcReg);
  4855. unsigned NarrowSize = NarrowTy.getSizeInBits();
  4856. if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
  4857. const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
  4858. MachineIRBuilder &B = MIRBuilder;
  4859. auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
  4860. // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
  4861. auto C_0 = B.buildConstant(NarrowTy, 0);
  4862. auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
  4863. UnmergeSrc.getReg(1), C_0);
  4864. auto LoCTLZ = IsUndef ?
  4865. B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
  4866. B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
  4867. auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
  4868. auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
  4869. auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
  4870. B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
  4871. MI.eraseFromParent();
  4872. return Legalized;
  4873. }
  4874. return UnableToLegalize;
  4875. }
  4876. LegalizerHelper::LegalizeResult
  4877. LegalizerHelper::narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx,
  4878. LLT NarrowTy) {
  4879. if (TypeIdx != 1)
  4880. return UnableToLegalize;
  4881. Register DstReg = MI.getOperand(0).getReg();
  4882. Register SrcReg = MI.getOperand(1).getReg();
  4883. LLT DstTy = MRI.getType(DstReg);
  4884. LLT SrcTy = MRI.getType(SrcReg);
  4885. unsigned NarrowSize = NarrowTy.getSizeInBits();
  4886. if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
  4887. const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
  4888. MachineIRBuilder &B = MIRBuilder;
  4889. auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
  4890. // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
  4891. auto C_0 = B.buildConstant(NarrowTy, 0);
  4892. auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
  4893. UnmergeSrc.getReg(0), C_0);
  4894. auto HiCTTZ = IsUndef ?
  4895. B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
  4896. B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
  4897. auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
  4898. auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
  4899. auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
  4900. B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
  4901. MI.eraseFromParent();
  4902. return Legalized;
  4903. }
  4904. return UnableToLegalize;
  4905. }
  4906. LegalizerHelper::LegalizeResult
  4907. LegalizerHelper::narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx,
  4908. LLT NarrowTy) {
  4909. if (TypeIdx != 1)
  4910. return UnableToLegalize;
  4911. Register DstReg = MI.getOperand(0).getReg();
  4912. LLT DstTy = MRI.getType(DstReg);
  4913. LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
  4914. unsigned NarrowSize = NarrowTy.getSizeInBits();
  4915. if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
  4916. auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
  4917. auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
  4918. auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
  4919. MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
  4920. MI.eraseFromParent();
  4921. return Legalized;
  4922. }
  4923. return UnableToLegalize;
  4924. }
  4925. LegalizerHelper::LegalizeResult
  4926. LegalizerHelper::lowerBitCount(MachineInstr &MI) {
  4927. unsigned Opc = MI.getOpcode();
  4928. const auto &TII = MIRBuilder.getTII();
  4929. auto isSupported = [this](const LegalityQuery &Q) {
  4930. auto QAction = LI.getAction(Q).Action;
  4931. return QAction == Legal || QAction == Libcall || QAction == Custom;
  4932. };
  4933. switch (Opc) {
  4934. default:
  4935. return UnableToLegalize;
  4936. case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
  4937. // This trivially expands to CTLZ.
  4938. Observer.changingInstr(MI);
  4939. MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
  4940. Observer.changedInstr(MI);
  4941. return Legalized;
  4942. }
  4943. case TargetOpcode::G_CTLZ: {
  4944. Register DstReg = MI.getOperand(0).getReg();
  4945. Register SrcReg = MI.getOperand(1).getReg();
  4946. LLT DstTy = MRI.getType(DstReg);
  4947. LLT SrcTy = MRI.getType(SrcReg);
  4948. unsigned Len = SrcTy.getSizeInBits();
  4949. if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
  4950. // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
  4951. auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg);
  4952. auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0);
  4953. auto ICmp = MIRBuilder.buildICmp(
  4954. CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc);
  4955. auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
  4956. MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
  4957. MI.eraseFromParent();
  4958. return Legalized;
  4959. }
  4960. // for now, we do this:
  4961. // NewLen = NextPowerOf2(Len);
  4962. // x = x | (x >> 1);
  4963. // x = x | (x >> 2);
  4964. // ...
  4965. // x = x | (x >>16);
  4966. // x = x | (x >>32); // for 64-bit input
  4967. // Upto NewLen/2
  4968. // return Len - popcount(x);
  4969. //
  4970. // Ref: "Hacker's Delight" by Henry Warren
  4971. Register Op = SrcReg;
  4972. unsigned NewLen = PowerOf2Ceil(Len);
  4973. for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
  4974. auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i);
  4975. auto MIBOp = MIRBuilder.buildOr(
  4976. SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt));
  4977. Op = MIBOp.getReg(0);
  4978. }
  4979. auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op);
  4980. MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len),
  4981. MIBPop);
  4982. MI.eraseFromParent();
  4983. return Legalized;
  4984. }
  4985. case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
  4986. // This trivially expands to CTTZ.
  4987. Observer.changingInstr(MI);
  4988. MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
  4989. Observer.changedInstr(MI);
  4990. return Legalized;
  4991. }
  4992. case TargetOpcode::G_CTTZ: {
  4993. Register DstReg = MI.getOperand(0).getReg();
  4994. Register SrcReg = MI.getOperand(1).getReg();
  4995. LLT DstTy = MRI.getType(DstReg);
  4996. LLT SrcTy = MRI.getType(SrcReg);
  4997. unsigned Len = SrcTy.getSizeInBits();
  4998. if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
  4999. // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
  5000. // zero.
  5001. auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg);
  5002. auto Zero = MIRBuilder.buildConstant(SrcTy, 0);
  5003. auto ICmp = MIRBuilder.buildICmp(
  5004. CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero);
  5005. auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
  5006. MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
  5007. MI.eraseFromParent();
  5008. return Legalized;
  5009. }
  5010. // for now, we use: { return popcount(~x & (x - 1)); }
  5011. // unless the target has ctlz but not ctpop, in which case we use:
  5012. // { return 32 - nlz(~x & (x-1)); }
  5013. // Ref: "Hacker's Delight" by Henry Warren
  5014. auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1);
  5015. auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
  5016. auto MIBTmp = MIRBuilder.buildAnd(
  5017. SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
  5018. if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
  5019. isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
  5020. auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len);
  5021. MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
  5022. MIRBuilder.buildCTLZ(SrcTy, MIBTmp));
  5023. MI.eraseFromParent();
  5024. return Legalized;
  5025. }
  5026. MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
  5027. MI.getOperand(1).setReg(MIBTmp.getReg(0));
  5028. return Legalized;
  5029. }
  5030. case TargetOpcode::G_CTPOP: {
  5031. Register SrcReg = MI.getOperand(1).getReg();
  5032. LLT Ty = MRI.getType(SrcReg);
  5033. unsigned Size = Ty.getSizeInBits();
  5034. MachineIRBuilder &B = MIRBuilder;
  5035. // Count set bits in blocks of 2 bits. Default approach would be
  5036. // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
  5037. // We use following formula instead:
  5038. // B2Count = val - { (val >> 1) & 0x55555555 }
  5039. // since it gives same result in blocks of 2 with one instruction less.
  5040. auto C_1 = B.buildConstant(Ty, 1);
  5041. auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1);
  5042. APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55));
  5043. auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0);
  5044. auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
  5045. auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi);
  5046. // In order to get count in blocks of 4 add values from adjacent block of 2.
  5047. // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
  5048. auto C_2 = B.buildConstant(Ty, 2);
  5049. auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2);
  5050. APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33));
  5051. auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0);
  5052. auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
  5053. auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
  5054. auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
  5055. // For count in blocks of 8 bits we don't have to mask high 4 bits before
  5056. // addition since count value sits in range {0,...,8} and 4 bits are enough
  5057. // to hold such binary values. After addition high 4 bits still hold count
  5058. // of set bits in high 4 bit block, set them to zero and get 8 bit result.
  5059. // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F
  5060. auto C_4 = B.buildConstant(Ty, 4);
  5061. auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4);
  5062. auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count);
  5063. APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F));
  5064. auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0);
  5065. auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
  5066. assert(Size<=128 && "Scalar size is too large for CTPOP lower algorithm");
  5067. // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
  5068. // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
  5069. auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01)));
  5070. auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
  5071. // Shift count result from 8 high bits to low bits.
  5072. auto C_SizeM8 = B.buildConstant(Ty, Size - 8);
  5073. B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
  5074. MI.eraseFromParent();
  5075. return Legalized;
  5076. }
  5077. }
  5078. }
  5079. // Check that (every element of) Reg is undef or not an exact multiple of BW.
  5080. static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI,
  5081. Register Reg, unsigned BW) {
  5082. return matchUnaryPredicate(
  5083. MRI, Reg,
  5084. [=](const Constant *C) {
  5085. // Null constant here means an undef.
  5086. const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(C);
  5087. return !CI || CI->getValue().urem(BW) != 0;
  5088. },
  5089. /*AllowUndefs*/ true);
  5090. }
  5091. LegalizerHelper::LegalizeResult
  5092. LegalizerHelper::lowerFunnelShiftWithInverse(MachineInstr &MI) {
  5093. Register Dst = MI.getOperand(0).getReg();
  5094. Register X = MI.getOperand(1).getReg();
  5095. Register Y = MI.getOperand(2).getReg();
  5096. Register Z = MI.getOperand(3).getReg();
  5097. LLT Ty = MRI.getType(Dst);
  5098. LLT ShTy = MRI.getType(Z);
  5099. unsigned BW = Ty.getScalarSizeInBits();
  5100. if (!isPowerOf2_32(BW))
  5101. return UnableToLegalize;
  5102. const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
  5103. unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
  5104. if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
  5105. // fshl X, Y, Z -> fshr X, Y, -Z
  5106. // fshr X, Y, Z -> fshl X, Y, -Z
  5107. auto Zero = MIRBuilder.buildConstant(ShTy, 0);
  5108. Z = MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
  5109. } else {
  5110. // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
  5111. // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
  5112. auto One = MIRBuilder.buildConstant(ShTy, 1);
  5113. if (IsFSHL) {
  5114. Y = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
  5115. X = MIRBuilder.buildLShr(Ty, X, One).getReg(0);
  5116. } else {
  5117. X = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
  5118. Y = MIRBuilder.buildShl(Ty, Y, One).getReg(0);
  5119. }
  5120. Z = MIRBuilder.buildNot(ShTy, Z).getReg(0);
  5121. }
  5122. MIRBuilder.buildInstr(RevOpcode, {Dst}, {X, Y, Z});
  5123. MI.eraseFromParent();
  5124. return Legalized;
  5125. }
  5126. LegalizerHelper::LegalizeResult
  5127. LegalizerHelper::lowerFunnelShiftAsShifts(MachineInstr &MI) {
  5128. Register Dst = MI.getOperand(0).getReg();
  5129. Register X = MI.getOperand(1).getReg();
  5130. Register Y = MI.getOperand(2).getReg();
  5131. Register Z = MI.getOperand(3).getReg();
  5132. LLT Ty = MRI.getType(Dst);
  5133. LLT ShTy = MRI.getType(Z);
  5134. const unsigned BW = Ty.getScalarSizeInBits();
  5135. const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
  5136. Register ShX, ShY;
  5137. Register ShAmt, InvShAmt;
  5138. // FIXME: Emit optimized urem by constant instead of letting it expand later.
  5139. if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
  5140. // fshl: X << C | Y >> (BW - C)
  5141. // fshr: X << (BW - C) | Y >> C
  5142. // where C = Z % BW is not zero
  5143. auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
  5144. ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
  5145. InvShAmt = MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
  5146. ShX = MIRBuilder.buildShl(Ty, X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
  5147. ShY = MIRBuilder.buildLShr(Ty, Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
  5148. } else {
  5149. // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
  5150. // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
  5151. auto Mask = MIRBuilder.buildConstant(ShTy, BW - 1);
  5152. if (isPowerOf2_32(BW)) {
  5153. // Z % BW -> Z & (BW - 1)
  5154. ShAmt = MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
  5155. // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
  5156. auto NotZ = MIRBuilder.buildNot(ShTy, Z);
  5157. InvShAmt = MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
  5158. } else {
  5159. auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
  5160. ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
  5161. InvShAmt = MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
  5162. }
  5163. auto One = MIRBuilder.buildConstant(ShTy, 1);
  5164. if (IsFSHL) {
  5165. ShX = MIRBuilder.buildShl(Ty, X, ShAmt).getReg(0);
  5166. auto ShY1 = MIRBuilder.buildLShr(Ty, Y, One);
  5167. ShY = MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
  5168. } else {
  5169. auto ShX1 = MIRBuilder.buildShl(Ty, X, One);
  5170. ShX = MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
  5171. ShY = MIRBuilder.buildLShr(Ty, Y, ShAmt).getReg(0);
  5172. }
  5173. }
  5174. MIRBuilder.buildOr(Dst, ShX, ShY);
  5175. MI.eraseFromParent();
  5176. return Legalized;
  5177. }
  5178. LegalizerHelper::LegalizeResult
  5179. LegalizerHelper::lowerFunnelShift(MachineInstr &MI) {
  5180. // These operations approximately do the following (while avoiding undefined
  5181. // shifts by BW):
  5182. // G_FSHL: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
  5183. // G_FSHR: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
  5184. Register Dst = MI.getOperand(0).getReg();
  5185. LLT Ty = MRI.getType(Dst);
  5186. LLT ShTy = MRI.getType(MI.getOperand(3).getReg());
  5187. bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
  5188. unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
  5189. // TODO: Use smarter heuristic that accounts for vector legalization.
  5190. if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action == Lower)
  5191. return lowerFunnelShiftAsShifts(MI);
  5192. // This only works for powers of 2, fallback to shifts if it fails.
  5193. LegalizerHelper::LegalizeResult Result = lowerFunnelShiftWithInverse(MI);
  5194. if (Result == UnableToLegalize)
  5195. return lowerFunnelShiftAsShifts(MI);
  5196. return Result;
  5197. }
  5198. LegalizerHelper::LegalizeResult
  5199. LegalizerHelper::lowerRotateWithReverseRotate(MachineInstr &MI) {
  5200. Register Dst = MI.getOperand(0).getReg();
  5201. Register Src = MI.getOperand(1).getReg();
  5202. Register Amt = MI.getOperand(2).getReg();
  5203. LLT AmtTy = MRI.getType(Amt);
  5204. auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
  5205. bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
  5206. unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
  5207. auto Neg = MIRBuilder.buildSub(AmtTy, Zero, Amt);
  5208. MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg});
  5209. MI.eraseFromParent();
  5210. return Legalized;
  5211. }
  5212. LegalizerHelper::LegalizeResult LegalizerHelper::lowerRotate(MachineInstr &MI) {
  5213. Register Dst = MI.getOperand(0).getReg();
  5214. Register Src = MI.getOperand(1).getReg();
  5215. Register Amt = MI.getOperand(2).getReg();
  5216. LLT DstTy = MRI.getType(Dst);
  5217. LLT SrcTy = MRI.getType(Src);
  5218. LLT AmtTy = MRI.getType(Amt);
  5219. unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
  5220. bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
  5221. MIRBuilder.setInstrAndDebugLoc(MI);
  5222. // If a rotate in the other direction is supported, use it.
  5223. unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
  5224. if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) &&
  5225. isPowerOf2_32(EltSizeInBits))
  5226. return lowerRotateWithReverseRotate(MI);
  5227. // If a funnel shift is supported, use it.
  5228. unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
  5229. unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
  5230. bool IsFShLegal = false;
  5231. if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
  5232. LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
  5233. auto buildFunnelShift = [&](unsigned Opc, Register R1, Register R2,
  5234. Register R3) {
  5235. MIRBuilder.buildInstr(Opc, {R1}, {R2, R2, R3});
  5236. MI.eraseFromParent();
  5237. return Legalized;
  5238. };
  5239. // If a funnel shift in the other direction is supported, use it.
  5240. if (IsFShLegal) {
  5241. return buildFunnelShift(FShOpc, Dst, Src, Amt);
  5242. } else if (isPowerOf2_32(EltSizeInBits)) {
  5243. Amt = MIRBuilder.buildNeg(DstTy, Amt).getReg(0);
  5244. return buildFunnelShift(RevFsh, Dst, Src, Amt);
  5245. }
  5246. }
  5247. auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
  5248. unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
  5249. unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
  5250. auto BitWidthMinusOneC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits - 1);
  5251. Register ShVal;
  5252. Register RevShiftVal;
  5253. if (isPowerOf2_32(EltSizeInBits)) {
  5254. // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
  5255. // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
  5256. auto NegAmt = MIRBuilder.buildSub(AmtTy, Zero, Amt);
  5257. auto ShAmt = MIRBuilder.buildAnd(AmtTy, Amt, BitWidthMinusOneC);
  5258. ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
  5259. auto RevAmt = MIRBuilder.buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
  5260. RevShiftVal =
  5261. MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, RevAmt}).getReg(0);
  5262. } else {
  5263. // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
  5264. // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
  5265. auto BitWidthC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits);
  5266. auto ShAmt = MIRBuilder.buildURem(AmtTy, Amt, BitWidthC);
  5267. ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
  5268. auto RevAmt = MIRBuilder.buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
  5269. auto One = MIRBuilder.buildConstant(AmtTy, 1);
  5270. auto Inner = MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, One});
  5271. RevShiftVal =
  5272. MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Inner, RevAmt}).getReg(0);
  5273. }
  5274. MIRBuilder.buildOr(Dst, ShVal, RevShiftVal);
  5275. MI.eraseFromParent();
  5276. return Legalized;
  5277. }
  5278. // Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
  5279. // representation.
  5280. LegalizerHelper::LegalizeResult
  5281. LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) {
  5282. Register Dst = MI.getOperand(0).getReg();
  5283. Register Src = MI.getOperand(1).getReg();
  5284. const LLT S64 = LLT::scalar(64);
  5285. const LLT S32 = LLT::scalar(32);
  5286. const LLT S1 = LLT::scalar(1);
  5287. assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
  5288. // unsigned cul2f(ulong u) {
  5289. // uint lz = clz(u);
  5290. // uint e = (u != 0) ? 127U + 63U - lz : 0;
  5291. // u = (u << lz) & 0x7fffffffffffffffUL;
  5292. // ulong t = u & 0xffffffffffUL;
  5293. // uint v = (e << 23) | (uint)(u >> 40);
  5294. // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
  5295. // return as_float(v + r);
  5296. // }
  5297. auto Zero32 = MIRBuilder.buildConstant(S32, 0);
  5298. auto Zero64 = MIRBuilder.buildConstant(S64, 0);
  5299. auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
  5300. auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
  5301. auto Sub = MIRBuilder.buildSub(S32, K, LZ);
  5302. auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
  5303. auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
  5304. auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
  5305. auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
  5306. auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
  5307. auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
  5308. auto T = MIRBuilder.buildAnd(S64, U, Mask1);
  5309. auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
  5310. auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
  5311. auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
  5312. auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
  5313. auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
  5314. auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
  5315. auto One = MIRBuilder.buildConstant(S32, 1);
  5316. auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
  5317. auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
  5318. auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
  5319. MIRBuilder.buildAdd(Dst, V, R);
  5320. MI.eraseFromParent();
  5321. return Legalized;
  5322. }
  5323. LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) {
  5324. Register Dst = MI.getOperand(0).getReg();
  5325. Register Src = MI.getOperand(1).getReg();
  5326. LLT DstTy = MRI.getType(Dst);
  5327. LLT SrcTy = MRI.getType(Src);
  5328. if (SrcTy == LLT::scalar(1)) {
  5329. auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
  5330. auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
  5331. MIRBuilder.buildSelect(Dst, Src, True, False);
  5332. MI.eraseFromParent();
  5333. return Legalized;
  5334. }
  5335. if (SrcTy != LLT::scalar(64))
  5336. return UnableToLegalize;
  5337. if (DstTy == LLT::scalar(32)) {
  5338. // TODO: SelectionDAG has several alternative expansions to port which may
  5339. // be more reasonble depending on the available instructions. If a target
  5340. // has sitofp, does not have CTLZ, or can efficiently use f64 as an
  5341. // intermediate type, this is probably worse.
  5342. return lowerU64ToF32BitOps(MI);
  5343. }
  5344. return UnableToLegalize;
  5345. }
  5346. LegalizerHelper::LegalizeResult LegalizerHelper::lowerSITOFP(MachineInstr &MI) {
  5347. Register Dst = MI.getOperand(0).getReg();
  5348. Register Src = MI.getOperand(1).getReg();
  5349. LLT DstTy = MRI.getType(Dst);
  5350. LLT SrcTy = MRI.getType(Src);
  5351. const LLT S64 = LLT::scalar(64);
  5352. const LLT S32 = LLT::scalar(32);
  5353. const LLT S1 = LLT::scalar(1);
  5354. if (SrcTy == S1) {
  5355. auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
  5356. auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
  5357. MIRBuilder.buildSelect(Dst, Src, True, False);
  5358. MI.eraseFromParent();
  5359. return Legalized;
  5360. }
  5361. if (SrcTy != S64)
  5362. return UnableToLegalize;
  5363. if (DstTy == S32) {
  5364. // signed cl2f(long l) {
  5365. // long s = l >> 63;
  5366. // float r = cul2f((l + s) ^ s);
  5367. // return s ? -r : r;
  5368. // }
  5369. Register L = Src;
  5370. auto SignBit = MIRBuilder.buildConstant(S64, 63);
  5371. auto S = MIRBuilder.buildAShr(S64, L, SignBit);
  5372. auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
  5373. auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
  5374. auto R = MIRBuilder.buildUITOFP(S32, Xor);
  5375. auto RNeg = MIRBuilder.buildFNeg(S32, R);
  5376. auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
  5377. MIRBuilder.buildConstant(S64, 0));
  5378. MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
  5379. MI.eraseFromParent();
  5380. return Legalized;
  5381. }
  5382. return UnableToLegalize;
  5383. }
  5384. LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOUI(MachineInstr &MI) {
  5385. Register Dst = MI.getOperand(0).getReg();
  5386. Register Src = MI.getOperand(1).getReg();
  5387. LLT DstTy = MRI.getType(Dst);
  5388. LLT SrcTy = MRI.getType(Src);
  5389. const LLT S64 = LLT::scalar(64);
  5390. const LLT S32 = LLT::scalar(32);
  5391. if (SrcTy != S64 && SrcTy != S32)
  5392. return UnableToLegalize;
  5393. if (DstTy != S32 && DstTy != S64)
  5394. return UnableToLegalize;
  5395. // FPTOSI gives same result as FPTOUI for positive signed integers.
  5396. // FPTOUI needs to deal with fp values that convert to unsigned integers
  5397. // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
  5398. APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
  5399. APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
  5400. : APFloat::IEEEdouble(),
  5401. APInt::getZero(SrcTy.getSizeInBits()));
  5402. TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
  5403. MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
  5404. MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
  5405. // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
  5406. // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
  5407. MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
  5408. MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
  5409. MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
  5410. MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
  5411. const LLT S1 = LLT::scalar(1);
  5412. MachineInstrBuilder FCMP =
  5413. MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
  5414. MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
  5415. MI.eraseFromParent();
  5416. return Legalized;
  5417. }
  5418. LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) {
  5419. Register Dst = MI.getOperand(0).getReg();
  5420. Register Src = MI.getOperand(1).getReg();
  5421. LLT DstTy = MRI.getType(Dst);
  5422. LLT SrcTy = MRI.getType(Src);
  5423. const LLT S64 = LLT::scalar(64);
  5424. const LLT S32 = LLT::scalar(32);
  5425. // FIXME: Only f32 to i64 conversions are supported.
  5426. if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64)
  5427. return UnableToLegalize;
  5428. // Expand f32 -> i64 conversion
  5429. // This algorithm comes from compiler-rt's implementation of fixsfdi:
  5430. // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
  5431. unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
  5432. auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000);
  5433. auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23);
  5434. auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
  5435. auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
  5436. auto SignMask = MIRBuilder.buildConstant(SrcTy,
  5437. APInt::getSignMask(SrcEltBits));
  5438. auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask);
  5439. auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
  5440. auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
  5441. Sign = MIRBuilder.buildSExt(DstTy, Sign);
  5442. auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
  5443. auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
  5444. auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000);
  5445. auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
  5446. R = MIRBuilder.buildZExt(DstTy, R);
  5447. auto Bias = MIRBuilder.buildConstant(SrcTy, 127);
  5448. auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias);
  5449. auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit);
  5450. auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent);
  5451. auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent);
  5452. auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub);
  5453. const LLT S1 = LLT::scalar(1);
  5454. auto CmpGt = MIRBuilder.buildICmp(CmpInst::ICMP_SGT,
  5455. S1, Exponent, ExponentLoBit);
  5456. R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
  5457. auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign);
  5458. auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign);
  5459. auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0);
  5460. auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT,
  5461. S1, Exponent, ZeroSrcTy);
  5462. auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0);
  5463. MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
  5464. MI.eraseFromParent();
  5465. return Legalized;
  5466. }
  5467. // f64 -> f16 conversion using round-to-nearest-even rounding mode.
  5468. LegalizerHelper::LegalizeResult
  5469. LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {
  5470. Register Dst = MI.getOperand(0).getReg();
  5471. Register Src = MI.getOperand(1).getReg();
  5472. if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
  5473. return UnableToLegalize;
  5474. const unsigned ExpMask = 0x7ff;
  5475. const unsigned ExpBiasf64 = 1023;
  5476. const unsigned ExpBiasf16 = 15;
  5477. const LLT S32 = LLT::scalar(32);
  5478. const LLT S1 = LLT::scalar(1);
  5479. auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);
  5480. Register U = Unmerge.getReg(0);
  5481. Register UH = Unmerge.getReg(1);
  5482. auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20));
  5483. E = MIRBuilder.buildAnd(S32, E, MIRBuilder.buildConstant(S32, ExpMask));
  5484. // Subtract the fp64 exponent bias (1023) to get the real exponent and
  5485. // add the f16 bias (15) to get the biased exponent for the f16 format.
  5486. E = MIRBuilder.buildAdd(
  5487. S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16));
  5488. auto M = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 8));
  5489. M = MIRBuilder.buildAnd(S32, M, MIRBuilder.buildConstant(S32, 0xffe));
  5490. auto MaskedSig = MIRBuilder.buildAnd(S32, UH,
  5491. MIRBuilder.buildConstant(S32, 0x1ff));
  5492. MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U);
  5493. auto Zero = MIRBuilder.buildConstant(S32, 0);
  5494. auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero);
  5495. auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0);
  5496. M = MIRBuilder.buildOr(S32, M, Lo40Set);
  5497. // (M != 0 ? 0x0200 : 0) | 0x7c00;
  5498. auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200);
  5499. auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero);
  5500. auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero);
  5501. auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00);
  5502. auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00);
  5503. // N = M | (E << 12);
  5504. auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12));
  5505. auto N = MIRBuilder.buildOr(S32, M, EShl12);
  5506. // B = clamp(1-E, 0, 13);
  5507. auto One = MIRBuilder.buildConstant(S32, 1);
  5508. auto OneSubExp = MIRBuilder.buildSub(S32, One, E);
  5509. auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero);
  5510. B = MIRBuilder.buildSMin(S32, B, MIRBuilder.buildConstant(S32, 13));
  5511. auto SigSetHigh = MIRBuilder.buildOr(S32, M,
  5512. MIRBuilder.buildConstant(S32, 0x1000));
  5513. auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B);
  5514. auto D0 = MIRBuilder.buildShl(S32, D, B);
  5515. auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1,
  5516. D0, SigSetHigh);
  5517. auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh);
  5518. D = MIRBuilder.buildOr(S32, D, D1);
  5519. auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One);
  5520. auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N);
  5521. auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7));
  5522. V = MIRBuilder.buildLShr(S32, V, MIRBuilder.buildConstant(S32, 2));
  5523. auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3,
  5524. MIRBuilder.buildConstant(S32, 3));
  5525. auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3);
  5526. auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3,
  5527. MIRBuilder.buildConstant(S32, 5));
  5528. auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5);
  5529. V1 = MIRBuilder.buildOr(S32, V0, V1);
  5530. V = MIRBuilder.buildAdd(S32, V, V1);
  5531. auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1,
  5532. E, MIRBuilder.buildConstant(S32, 30));
  5533. V = MIRBuilder.buildSelect(S32, CmpEGt30,
  5534. MIRBuilder.buildConstant(S32, 0x7c00), V);
  5535. auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1,
  5536. E, MIRBuilder.buildConstant(S32, 1039));
  5537. V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V);
  5538. // Extract the sign bit.
  5539. auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16));
  5540. Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000));
  5541. // Insert the sign bit
  5542. V = MIRBuilder.buildOr(S32, Sign, V);
  5543. MIRBuilder.buildTrunc(Dst, V);
  5544. MI.eraseFromParent();
  5545. return Legalized;
  5546. }
  5547. LegalizerHelper::LegalizeResult
  5548. LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) {
  5549. Register Dst = MI.getOperand(0).getReg();
  5550. Register Src = MI.getOperand(1).getReg();
  5551. LLT DstTy = MRI.getType(Dst);
  5552. LLT SrcTy = MRI.getType(Src);
  5553. const LLT S64 = LLT::scalar(64);
  5554. const LLT S16 = LLT::scalar(16);
  5555. if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64)
  5556. return lowerFPTRUNC_F64_TO_F16(MI);
  5557. return UnableToLegalize;
  5558. }
  5559. // TODO: If RHS is a constant SelectionDAGBuilder expands this into a
  5560. // multiplication tree.
  5561. LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPOWI(MachineInstr &MI) {
  5562. Register Dst = MI.getOperand(0).getReg();
  5563. Register Src0 = MI.getOperand(1).getReg();
  5564. Register Src1 = MI.getOperand(2).getReg();
  5565. LLT Ty = MRI.getType(Dst);
  5566. auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
  5567. MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags());
  5568. MI.eraseFromParent();
  5569. return Legalized;
  5570. }
  5571. static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
  5572. switch (Opc) {
  5573. case TargetOpcode::G_SMIN:
  5574. return CmpInst::ICMP_SLT;
  5575. case TargetOpcode::G_SMAX:
  5576. return CmpInst::ICMP_SGT;
  5577. case TargetOpcode::G_UMIN:
  5578. return CmpInst::ICMP_ULT;
  5579. case TargetOpcode::G_UMAX:
  5580. return CmpInst::ICMP_UGT;
  5581. default:
  5582. llvm_unreachable("not in integer min/max");
  5583. }
  5584. }
  5585. LegalizerHelper::LegalizeResult LegalizerHelper::lowerMinMax(MachineInstr &MI) {
  5586. Register Dst = MI.getOperand(0).getReg();
  5587. Register Src0 = MI.getOperand(1).getReg();
  5588. Register Src1 = MI.getOperand(2).getReg();
  5589. const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
  5590. LLT CmpType = MRI.getType(Dst).changeElementSize(1);
  5591. auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
  5592. MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
  5593. MI.eraseFromParent();
  5594. return Legalized;
  5595. }
  5596. LegalizerHelper::LegalizeResult
  5597. LegalizerHelper::lowerFCopySign(MachineInstr &MI) {
  5598. Register Dst = MI.getOperand(0).getReg();
  5599. Register Src0 = MI.getOperand(1).getReg();
  5600. Register Src1 = MI.getOperand(2).getReg();
  5601. const LLT Src0Ty = MRI.getType(Src0);
  5602. const LLT Src1Ty = MRI.getType(Src1);
  5603. const int Src0Size = Src0Ty.getScalarSizeInBits();
  5604. const int Src1Size = Src1Ty.getScalarSizeInBits();
  5605. auto SignBitMask = MIRBuilder.buildConstant(
  5606. Src0Ty, APInt::getSignMask(Src0Size));
  5607. auto NotSignBitMask = MIRBuilder.buildConstant(
  5608. Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
  5609. Register And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask).getReg(0);
  5610. Register And1;
  5611. if (Src0Ty == Src1Ty) {
  5612. And1 = MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask).getReg(0);
  5613. } else if (Src0Size > Src1Size) {
  5614. auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
  5615. auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
  5616. auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
  5617. And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0);
  5618. } else {
  5619. auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
  5620. auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
  5621. auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
  5622. And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask).getReg(0);
  5623. }
  5624. // Be careful about setting nsz/nnan/ninf on every instruction, since the
  5625. // constants are a nan and -0.0, but the final result should preserve
  5626. // everything.
  5627. unsigned Flags = MI.getFlags();
  5628. MIRBuilder.buildOr(Dst, And0, And1, Flags);
  5629. MI.eraseFromParent();
  5630. return Legalized;
  5631. }
  5632. LegalizerHelper::LegalizeResult
  5633. LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {
  5634. unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ?
  5635. TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
  5636. Register Dst = MI.getOperand(0).getReg();
  5637. Register Src0 = MI.getOperand(1).getReg();
  5638. Register Src1 = MI.getOperand(2).getReg();
  5639. LLT Ty = MRI.getType(Dst);
  5640. if (!MI.getFlag(MachineInstr::FmNoNans)) {
  5641. // Insert canonicalizes if it's possible we need to quiet to get correct
  5642. // sNaN behavior.
  5643. // Note this must be done here, and not as an optimization combine in the
  5644. // absence of a dedicate quiet-snan instruction as we're using an
  5645. // omni-purpose G_FCANONICALIZE.
  5646. if (!isKnownNeverSNaN(Src0, MRI))
  5647. Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
  5648. if (!isKnownNeverSNaN(Src1, MRI))
  5649. Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
  5650. }
  5651. // If there are no nans, it's safe to simply replace this with the non-IEEE
  5652. // version.
  5653. MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
  5654. MI.eraseFromParent();
  5655. return Legalized;
  5656. }
  5657. LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) {
  5658. // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
  5659. Register DstReg = MI.getOperand(0).getReg();
  5660. LLT Ty = MRI.getType(DstReg);
  5661. unsigned Flags = MI.getFlags();
  5662. auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
  5663. Flags);
  5664. MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
  5665. MI.eraseFromParent();
  5666. return Legalized;
  5667. }
  5668. LegalizerHelper::LegalizeResult
  5669. LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) {
  5670. Register DstReg = MI.getOperand(0).getReg();
  5671. Register X = MI.getOperand(1).getReg();
  5672. const unsigned Flags = MI.getFlags();
  5673. const LLT Ty = MRI.getType(DstReg);
  5674. const LLT CondTy = Ty.changeElementSize(1);
  5675. // round(x) =>
  5676. // t = trunc(x);
  5677. // d = fabs(x - t);
  5678. // o = copysign(1.0f, x);
  5679. // return t + (d >= 0.5 ? o : 0.0);
  5680. auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags);
  5681. auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags);
  5682. auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags);
  5683. auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
  5684. auto One = MIRBuilder.buildFConstant(Ty, 1.0);
  5685. auto Half = MIRBuilder.buildFConstant(Ty, 0.5);
  5686. auto SignOne = MIRBuilder.buildFCopysign(Ty, One, X);
  5687. auto Cmp = MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half,
  5688. Flags);
  5689. auto Sel = MIRBuilder.buildSelect(Ty, Cmp, SignOne, Zero, Flags);
  5690. MIRBuilder.buildFAdd(DstReg, T, Sel, Flags);
  5691. MI.eraseFromParent();
  5692. return Legalized;
  5693. }
  5694. LegalizerHelper::LegalizeResult
  5695. LegalizerHelper::lowerFFloor(MachineInstr &MI) {
  5696. Register DstReg = MI.getOperand(0).getReg();
  5697. Register SrcReg = MI.getOperand(1).getReg();
  5698. unsigned Flags = MI.getFlags();
  5699. LLT Ty = MRI.getType(DstReg);
  5700. const LLT CondTy = Ty.changeElementSize(1);
  5701. // result = trunc(src);
  5702. // if (src < 0.0 && src != result)
  5703. // result += -1.0.
  5704. auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
  5705. auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
  5706. auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
  5707. SrcReg, Zero, Flags);
  5708. auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
  5709. SrcReg, Trunc, Flags);
  5710. auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
  5711. auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
  5712. MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
  5713. MI.eraseFromParent();
  5714. return Legalized;
  5715. }
  5716. LegalizerHelper::LegalizeResult
  5717. LegalizerHelper::lowerMergeValues(MachineInstr &MI) {
  5718. const unsigned NumOps = MI.getNumOperands();
  5719. Register DstReg = MI.getOperand(0).getReg();
  5720. Register Src0Reg = MI.getOperand(1).getReg();
  5721. LLT DstTy = MRI.getType(DstReg);
  5722. LLT SrcTy = MRI.getType(Src0Reg);
  5723. unsigned PartSize = SrcTy.getSizeInBits();
  5724. LLT WideTy = LLT::scalar(DstTy.getSizeInBits());
  5725. Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0);
  5726. for (unsigned I = 2; I != NumOps; ++I) {
  5727. const unsigned Offset = (I - 1) * PartSize;
  5728. Register SrcReg = MI.getOperand(I).getReg();
  5729. auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
  5730. Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
  5731. MRI.createGenericVirtualRegister(WideTy);
  5732. auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
  5733. auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
  5734. MIRBuilder.buildOr(NextResult, ResultReg, Shl);
  5735. ResultReg = NextResult;
  5736. }
  5737. if (DstTy.isPointer()) {
  5738. if (MIRBuilder.getDataLayout().isNonIntegralAddressSpace(
  5739. DstTy.getAddressSpace())) {
  5740. LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n");
  5741. return UnableToLegalize;
  5742. }
  5743. MIRBuilder.buildIntToPtr(DstReg, ResultReg);
  5744. }
  5745. MI.eraseFromParent();
  5746. return Legalized;
  5747. }
  5748. LegalizerHelper::LegalizeResult
  5749. LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) {
  5750. const unsigned NumDst = MI.getNumOperands() - 1;
  5751. Register SrcReg = MI.getOperand(NumDst).getReg();
  5752. Register Dst0Reg = MI.getOperand(0).getReg();
  5753. LLT DstTy = MRI.getType(Dst0Reg);
  5754. if (DstTy.isPointer())
  5755. return UnableToLegalize; // TODO
  5756. SrcReg = coerceToScalar(SrcReg);
  5757. if (!SrcReg)
  5758. return UnableToLegalize;
  5759. // Expand scalarizing unmerge as bitcast to integer and shift.
  5760. LLT IntTy = MRI.getType(SrcReg);
  5761. MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
  5762. const unsigned DstSize = DstTy.getSizeInBits();
  5763. unsigned Offset = DstSize;
  5764. for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
  5765. auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
  5766. auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
  5767. MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
  5768. }
  5769. MI.eraseFromParent();
  5770. return Legalized;
  5771. }
  5772. /// Lower a vector extract or insert by writing the vector to a stack temporary
  5773. /// and reloading the element or vector.
  5774. ///
  5775. /// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx
  5776. /// =>
  5777. /// %stack_temp = G_FRAME_INDEX
  5778. /// G_STORE %vec, %stack_temp
  5779. /// %idx = clamp(%idx, %vec.getNumElements())
  5780. /// %element_ptr = G_PTR_ADD %stack_temp, %idx
  5781. /// %dst = G_LOAD %element_ptr
  5782. LegalizerHelper::LegalizeResult
  5783. LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) {
  5784. Register DstReg = MI.getOperand(0).getReg();
  5785. Register SrcVec = MI.getOperand(1).getReg();
  5786. Register InsertVal;
  5787. if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
  5788. InsertVal = MI.getOperand(2).getReg();
  5789. Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
  5790. LLT VecTy = MRI.getType(SrcVec);
  5791. LLT EltTy = VecTy.getElementType();
  5792. unsigned NumElts = VecTy.getNumElements();
  5793. int64_t IdxVal;
  5794. if (mi_match(Idx, MRI, m_ICst(IdxVal)) && IdxVal <= NumElts) {
  5795. SmallVector<Register, 8> SrcRegs;
  5796. extractParts(SrcVec, EltTy, NumElts, SrcRegs);
  5797. if (InsertVal) {
  5798. SrcRegs[IdxVal] = MI.getOperand(2).getReg();
  5799. MIRBuilder.buildMergeLikeInstr(DstReg, SrcRegs);
  5800. } else {
  5801. MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]);
  5802. }
  5803. MI.eraseFromParent();
  5804. return Legalized;
  5805. }
  5806. if (!EltTy.isByteSized()) { // Not implemented.
  5807. LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n");
  5808. return UnableToLegalize;
  5809. }
  5810. unsigned EltBytes = EltTy.getSizeInBytes();
  5811. Align VecAlign = getStackTemporaryAlignment(VecTy);
  5812. Align EltAlign;
  5813. MachinePointerInfo PtrInfo;
  5814. auto StackTemp = createStackTemporary(TypeSize::Fixed(VecTy.getSizeInBytes()),
  5815. VecAlign, PtrInfo);
  5816. MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
  5817. // Get the pointer to the element, and be sure not to hit undefined behavior
  5818. // if the index is out of bounds.
  5819. Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx);
  5820. if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
  5821. int64_t Offset = IdxVal * EltBytes;
  5822. PtrInfo = PtrInfo.getWithOffset(Offset);
  5823. EltAlign = commonAlignment(VecAlign, Offset);
  5824. } else {
  5825. // We lose information with a variable offset.
  5826. EltAlign = getStackTemporaryAlignment(EltTy);
  5827. PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace());
  5828. }
  5829. if (InsertVal) {
  5830. // Write the inserted element
  5831. MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
  5832. // Reload the whole vector.
  5833. MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
  5834. } else {
  5835. MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
  5836. }
  5837. MI.eraseFromParent();
  5838. return Legalized;
  5839. }
  5840. LegalizerHelper::LegalizeResult
  5841. LegalizerHelper::lowerShuffleVector(MachineInstr &MI) {
  5842. Register DstReg = MI.getOperand(0).getReg();
  5843. Register Src0Reg = MI.getOperand(1).getReg();
  5844. Register Src1Reg = MI.getOperand(2).getReg();
  5845. LLT Src0Ty = MRI.getType(Src0Reg);
  5846. LLT DstTy = MRI.getType(DstReg);
  5847. LLT IdxTy = LLT::scalar(32);
  5848. ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
  5849. if (DstTy.isScalar()) {
  5850. if (Src0Ty.isVector())
  5851. return UnableToLegalize;
  5852. // This is just a SELECT.
  5853. assert(Mask.size() == 1 && "Expected a single mask element");
  5854. Register Val;
  5855. if (Mask[0] < 0 || Mask[0] > 1)
  5856. Val = MIRBuilder.buildUndef(DstTy).getReg(0);
  5857. else
  5858. Val = Mask[0] == 0 ? Src0Reg : Src1Reg;
  5859. MIRBuilder.buildCopy(DstReg, Val);
  5860. MI.eraseFromParent();
  5861. return Legalized;
  5862. }
  5863. Register Undef;
  5864. SmallVector<Register, 32> BuildVec;
  5865. LLT EltTy = DstTy.getElementType();
  5866. for (int Idx : Mask) {
  5867. if (Idx < 0) {
  5868. if (!Undef.isValid())
  5869. Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
  5870. BuildVec.push_back(Undef);
  5871. continue;
  5872. }
  5873. if (Src0Ty.isScalar()) {
  5874. BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg);
  5875. } else {
  5876. int NumElts = Src0Ty.getNumElements();
  5877. Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
  5878. int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
  5879. auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
  5880. auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK);
  5881. BuildVec.push_back(Extract.getReg(0));
  5882. }
  5883. }
  5884. MIRBuilder.buildBuildVector(DstReg, BuildVec);
  5885. MI.eraseFromParent();
  5886. return Legalized;
  5887. }
  5888. LegalizerHelper::LegalizeResult
  5889. LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) {
  5890. const auto &MF = *MI.getMF();
  5891. const auto &TFI = *MF.getSubtarget().getFrameLowering();
  5892. if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
  5893. return UnableToLegalize;
  5894. Register Dst = MI.getOperand(0).getReg();
  5895. Register AllocSize = MI.getOperand(1).getReg();
  5896. Align Alignment = assumeAligned(MI.getOperand(2).getImm());
  5897. LLT PtrTy = MRI.getType(Dst);
  5898. LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
  5899. Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
  5900. auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
  5901. SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
  5902. // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
  5903. // have to generate an extra instruction to negate the alloc and then use
  5904. // G_PTR_ADD to add the negative offset.
  5905. auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
  5906. if (Alignment > Align(1)) {
  5907. APInt AlignMask(IntPtrTy.getSizeInBits(), Alignment.value(), true);
  5908. AlignMask.negate();
  5909. auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
  5910. Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
  5911. }
  5912. SPTmp = MIRBuilder.buildCast(PtrTy, Alloc);
  5913. MIRBuilder.buildCopy(SPReg, SPTmp);
  5914. MIRBuilder.buildCopy(Dst, SPTmp);
  5915. MI.eraseFromParent();
  5916. return Legalized;
  5917. }
  5918. LegalizerHelper::LegalizeResult
  5919. LegalizerHelper::lowerExtract(MachineInstr &MI) {
  5920. Register Dst = MI.getOperand(0).getReg();
  5921. Register Src = MI.getOperand(1).getReg();
  5922. unsigned Offset = MI.getOperand(2).getImm();
  5923. LLT DstTy = MRI.getType(Dst);
  5924. LLT SrcTy = MRI.getType(Src);
  5925. // Extract sub-vector or one element
  5926. if (SrcTy.isVector()) {
  5927. unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
  5928. unsigned DstSize = DstTy.getSizeInBits();
  5929. if ((Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
  5930. (Offset + DstSize <= SrcTy.getSizeInBits())) {
  5931. // Unmerge and allow access to each Src element for the artifact combiner.
  5932. auto Unmerge = MIRBuilder.buildUnmerge(SrcTy.getElementType(), Src);
  5933. // Take element(s) we need to extract and copy it (merge them).
  5934. SmallVector<Register, 8> SubVectorElts;
  5935. for (unsigned Idx = Offset / SrcEltSize;
  5936. Idx < (Offset + DstSize) / SrcEltSize; ++Idx) {
  5937. SubVectorElts.push_back(Unmerge.getReg(Idx));
  5938. }
  5939. if (SubVectorElts.size() == 1)
  5940. MIRBuilder.buildCopy(Dst, SubVectorElts[0]);
  5941. else
  5942. MIRBuilder.buildMergeLikeInstr(Dst, SubVectorElts);
  5943. MI.eraseFromParent();
  5944. return Legalized;
  5945. }
  5946. }
  5947. if (DstTy.isScalar() &&
  5948. (SrcTy.isScalar() ||
  5949. (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
  5950. LLT SrcIntTy = SrcTy;
  5951. if (!SrcTy.isScalar()) {
  5952. SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
  5953. Src = MIRBuilder.buildBitcast(SrcIntTy, Src).getReg(0);
  5954. }
  5955. if (Offset == 0)
  5956. MIRBuilder.buildTrunc(Dst, Src);
  5957. else {
  5958. auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
  5959. auto Shr = MIRBuilder.buildLShr(SrcIntTy, Src, ShiftAmt);
  5960. MIRBuilder.buildTrunc(Dst, Shr);
  5961. }
  5962. MI.eraseFromParent();
  5963. return Legalized;
  5964. }
  5965. return UnableToLegalize;
  5966. }
  5967. LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) {
  5968. Register Dst = MI.getOperand(0).getReg();
  5969. Register Src = MI.getOperand(1).getReg();
  5970. Register InsertSrc = MI.getOperand(2).getReg();
  5971. uint64_t Offset = MI.getOperand(3).getImm();
  5972. LLT DstTy = MRI.getType(Src);
  5973. LLT InsertTy = MRI.getType(InsertSrc);
  5974. // Insert sub-vector or one element
  5975. if (DstTy.isVector() && !InsertTy.isPointer()) {
  5976. LLT EltTy = DstTy.getElementType();
  5977. unsigned EltSize = EltTy.getSizeInBits();
  5978. unsigned InsertSize = InsertTy.getSizeInBits();
  5979. if ((Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
  5980. (Offset + InsertSize <= DstTy.getSizeInBits())) {
  5981. auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, Src);
  5982. SmallVector<Register, 8> DstElts;
  5983. unsigned Idx = 0;
  5984. // Elements from Src before insert start Offset
  5985. for (; Idx < Offset / EltSize; ++Idx) {
  5986. DstElts.push_back(UnmergeSrc.getReg(Idx));
  5987. }
  5988. // Replace elements in Src with elements from InsertSrc
  5989. if (InsertTy.getSizeInBits() > EltSize) {
  5990. auto UnmergeInsertSrc = MIRBuilder.buildUnmerge(EltTy, InsertSrc);
  5991. for (unsigned i = 0; Idx < (Offset + InsertSize) / EltSize;
  5992. ++Idx, ++i) {
  5993. DstElts.push_back(UnmergeInsertSrc.getReg(i));
  5994. }
  5995. } else {
  5996. DstElts.push_back(InsertSrc);
  5997. ++Idx;
  5998. }
  5999. // Remaining elements from Src after insert
  6000. for (; Idx < DstTy.getNumElements(); ++Idx) {
  6001. DstElts.push_back(UnmergeSrc.getReg(Idx));
  6002. }
  6003. MIRBuilder.buildMergeLikeInstr(Dst, DstElts);
  6004. MI.eraseFromParent();
  6005. return Legalized;
  6006. }
  6007. }
  6008. if (InsertTy.isVector() ||
  6009. (DstTy.isVector() && DstTy.getElementType() != InsertTy))
  6010. return UnableToLegalize;
  6011. const DataLayout &DL = MIRBuilder.getDataLayout();
  6012. if ((DstTy.isPointer() &&
  6013. DL.isNonIntegralAddressSpace(DstTy.getAddressSpace())) ||
  6014. (InsertTy.isPointer() &&
  6015. DL.isNonIntegralAddressSpace(InsertTy.getAddressSpace()))) {
  6016. LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
  6017. return UnableToLegalize;
  6018. }
  6019. LLT IntDstTy = DstTy;
  6020. if (!DstTy.isScalar()) {
  6021. IntDstTy = LLT::scalar(DstTy.getSizeInBits());
  6022. Src = MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
  6023. }
  6024. if (!InsertTy.isScalar()) {
  6025. const LLT IntInsertTy = LLT::scalar(InsertTy.getSizeInBits());
  6026. InsertSrc = MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
  6027. }
  6028. Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
  6029. if (Offset != 0) {
  6030. auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
  6031. ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
  6032. }
  6033. APInt MaskVal = APInt::getBitsSetWithWrap(
  6034. DstTy.getSizeInBits(), Offset + InsertTy.getSizeInBits(), Offset);
  6035. auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
  6036. auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
  6037. auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
  6038. MIRBuilder.buildCast(Dst, Or);
  6039. MI.eraseFromParent();
  6040. return Legalized;
  6041. }
  6042. LegalizerHelper::LegalizeResult
  6043. LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) {
  6044. Register Dst0 = MI.getOperand(0).getReg();
  6045. Register Dst1 = MI.getOperand(1).getReg();
  6046. Register LHS = MI.getOperand(2).getReg();
  6047. Register RHS = MI.getOperand(3).getReg();
  6048. const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
  6049. LLT Ty = MRI.getType(Dst0);
  6050. LLT BoolTy = MRI.getType(Dst1);
  6051. if (IsAdd)
  6052. MIRBuilder.buildAdd(Dst0, LHS, RHS);
  6053. else
  6054. MIRBuilder.buildSub(Dst0, LHS, RHS);
  6055. // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
  6056. auto Zero = MIRBuilder.buildConstant(Ty, 0);
  6057. // For an addition, the result should be less than one of the operands (LHS)
  6058. // if and only if the other operand (RHS) is negative, otherwise there will
  6059. // be overflow.
  6060. // For a subtraction, the result should be less than one of the operands
  6061. // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
  6062. // otherwise there will be overflow.
  6063. auto ResultLowerThanLHS =
  6064. MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, Dst0, LHS);
  6065. auto ConditionRHS = MIRBuilder.buildICmp(
  6066. IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
  6067. MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
  6068. MI.eraseFromParent();
  6069. return Legalized;
  6070. }
  6071. LegalizerHelper::LegalizeResult
  6072. LegalizerHelper::lowerAddSubSatToMinMax(MachineInstr &MI) {
  6073. Register Res = MI.getOperand(0).getReg();
  6074. Register LHS = MI.getOperand(1).getReg();
  6075. Register RHS = MI.getOperand(2).getReg();
  6076. LLT Ty = MRI.getType(Res);
  6077. bool IsSigned;
  6078. bool IsAdd;
  6079. unsigned BaseOp;
  6080. switch (MI.getOpcode()) {
  6081. default:
  6082. llvm_unreachable("unexpected addsat/subsat opcode");
  6083. case TargetOpcode::G_UADDSAT:
  6084. IsSigned = false;
  6085. IsAdd = true;
  6086. BaseOp = TargetOpcode::G_ADD;
  6087. break;
  6088. case TargetOpcode::G_SADDSAT:
  6089. IsSigned = true;
  6090. IsAdd = true;
  6091. BaseOp = TargetOpcode::G_ADD;
  6092. break;
  6093. case TargetOpcode::G_USUBSAT:
  6094. IsSigned = false;
  6095. IsAdd = false;
  6096. BaseOp = TargetOpcode::G_SUB;
  6097. break;
  6098. case TargetOpcode::G_SSUBSAT:
  6099. IsSigned = true;
  6100. IsAdd = false;
  6101. BaseOp = TargetOpcode::G_SUB;
  6102. break;
  6103. }
  6104. if (IsSigned) {
  6105. // sadd.sat(a, b) ->
  6106. // hi = 0x7fffffff - smax(a, 0)
  6107. // lo = 0x80000000 - smin(a, 0)
  6108. // a + smin(smax(lo, b), hi)
  6109. // ssub.sat(a, b) ->
  6110. // lo = smax(a, -1) - 0x7fffffff
  6111. // hi = smin(a, -1) - 0x80000000
  6112. // a - smin(smax(lo, b), hi)
  6113. // TODO: AMDGPU can use a "median of 3" instruction here:
  6114. // a +/- med3(lo, b, hi)
  6115. uint64_t NumBits = Ty.getScalarSizeInBits();
  6116. auto MaxVal =
  6117. MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(NumBits));
  6118. auto MinVal =
  6119. MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
  6120. MachineInstrBuilder Hi, Lo;
  6121. if (IsAdd) {
  6122. auto Zero = MIRBuilder.buildConstant(Ty, 0);
  6123. Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero));
  6124. Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero));
  6125. } else {
  6126. auto NegOne = MIRBuilder.buildConstant(Ty, -1);
  6127. Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne),
  6128. MaxVal);
  6129. Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne),
  6130. MinVal);
  6131. }
  6132. auto RHSClamped =
  6133. MIRBuilder.buildSMin(Ty, MIRBuilder.buildSMax(Ty, Lo, RHS), Hi);
  6134. MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
  6135. } else {
  6136. // uadd.sat(a, b) -> a + umin(~a, b)
  6137. // usub.sat(a, b) -> a - umin(a, b)
  6138. Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS;
  6139. auto Min = MIRBuilder.buildUMin(Ty, Not, RHS);
  6140. MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
  6141. }
  6142. MI.eraseFromParent();
  6143. return Legalized;
  6144. }
  6145. LegalizerHelper::LegalizeResult
  6146. LegalizerHelper::lowerAddSubSatToAddoSubo(MachineInstr &MI) {
  6147. Register Res = MI.getOperand(0).getReg();
  6148. Register LHS = MI.getOperand(1).getReg();
  6149. Register RHS = MI.getOperand(2).getReg();
  6150. LLT Ty = MRI.getType(Res);
  6151. LLT BoolTy = Ty.changeElementSize(1);
  6152. bool IsSigned;
  6153. bool IsAdd;
  6154. unsigned OverflowOp;
  6155. switch (MI.getOpcode()) {
  6156. default:
  6157. llvm_unreachable("unexpected addsat/subsat opcode");
  6158. case TargetOpcode::G_UADDSAT:
  6159. IsSigned = false;
  6160. IsAdd = true;
  6161. OverflowOp = TargetOpcode::G_UADDO;
  6162. break;
  6163. case TargetOpcode::G_SADDSAT:
  6164. IsSigned = true;
  6165. IsAdd = true;
  6166. OverflowOp = TargetOpcode::G_SADDO;
  6167. break;
  6168. case TargetOpcode::G_USUBSAT:
  6169. IsSigned = false;
  6170. IsAdd = false;
  6171. OverflowOp = TargetOpcode::G_USUBO;
  6172. break;
  6173. case TargetOpcode::G_SSUBSAT:
  6174. IsSigned = true;
  6175. IsAdd = false;
  6176. OverflowOp = TargetOpcode::G_SSUBO;
  6177. break;
  6178. }
  6179. auto OverflowRes =
  6180. MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
  6181. Register Tmp = OverflowRes.getReg(0);
  6182. Register Ov = OverflowRes.getReg(1);
  6183. MachineInstrBuilder Clamp;
  6184. if (IsSigned) {
  6185. // sadd.sat(a, b) ->
  6186. // {tmp, ov} = saddo(a, b)
  6187. // ov ? (tmp >>s 31) + 0x80000000 : r
  6188. // ssub.sat(a, b) ->
  6189. // {tmp, ov} = ssubo(a, b)
  6190. // ov ? (tmp >>s 31) + 0x80000000 : r
  6191. uint64_t NumBits = Ty.getScalarSizeInBits();
  6192. auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1);
  6193. auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
  6194. auto MinVal =
  6195. MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
  6196. Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal);
  6197. } else {
  6198. // uadd.sat(a, b) ->
  6199. // {tmp, ov} = uaddo(a, b)
  6200. // ov ? 0xffffffff : tmp
  6201. // usub.sat(a, b) ->
  6202. // {tmp, ov} = usubo(a, b)
  6203. // ov ? 0 : tmp
  6204. Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
  6205. }
  6206. MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp);
  6207. MI.eraseFromParent();
  6208. return Legalized;
  6209. }
  6210. LegalizerHelper::LegalizeResult
  6211. LegalizerHelper::lowerShlSat(MachineInstr &MI) {
  6212. assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
  6213. MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
  6214. "Expected shlsat opcode!");
  6215. bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT;
  6216. Register Res = MI.getOperand(0).getReg();
  6217. Register LHS = MI.getOperand(1).getReg();
  6218. Register RHS = MI.getOperand(2).getReg();
  6219. LLT Ty = MRI.getType(Res);
  6220. LLT BoolTy = Ty.changeElementSize(1);
  6221. unsigned BW = Ty.getScalarSizeInBits();
  6222. auto Result = MIRBuilder.buildShl(Ty, LHS, RHS);
  6223. auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS)
  6224. : MIRBuilder.buildLShr(Ty, Result, RHS);
  6225. MachineInstrBuilder SatVal;
  6226. if (IsSigned) {
  6227. auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW));
  6228. auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW));
  6229. auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS,
  6230. MIRBuilder.buildConstant(Ty, 0));
  6231. SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
  6232. } else {
  6233. SatVal = MIRBuilder.buildConstant(Ty, APInt::getMaxValue(BW));
  6234. }
  6235. auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig);
  6236. MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
  6237. MI.eraseFromParent();
  6238. return Legalized;
  6239. }
  6240. LegalizerHelper::LegalizeResult
  6241. LegalizerHelper::lowerBswap(MachineInstr &MI) {
  6242. Register Dst = MI.getOperand(0).getReg();
  6243. Register Src = MI.getOperand(1).getReg();
  6244. const LLT Ty = MRI.getType(Src);
  6245. unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
  6246. unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
  6247. // Swap most and least significant byte, set remaining bytes in Res to zero.
  6248. auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
  6249. auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
  6250. auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
  6251. auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
  6252. // Set i-th high/low byte in Res to i-th low/high byte from Src.
  6253. for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
  6254. // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
  6255. APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
  6256. auto Mask = MIRBuilder.buildConstant(Ty, APMask);
  6257. auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
  6258. // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
  6259. auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
  6260. auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
  6261. Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
  6262. // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
  6263. auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
  6264. auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
  6265. Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
  6266. }
  6267. Res.getInstr()->getOperand(0).setReg(Dst);
  6268. MI.eraseFromParent();
  6269. return Legalized;
  6270. }
  6271. //{ (Src & Mask) >> N } | { (Src << N) & Mask }
  6272. static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B,
  6273. MachineInstrBuilder Src, APInt Mask) {
  6274. const LLT Ty = Dst.getLLTTy(*B.getMRI());
  6275. MachineInstrBuilder C_N = B.buildConstant(Ty, N);
  6276. MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
  6277. auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
  6278. auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
  6279. return B.buildOr(Dst, LHS, RHS);
  6280. }
  6281. LegalizerHelper::LegalizeResult
  6282. LegalizerHelper::lowerBitreverse(MachineInstr &MI) {
  6283. Register Dst = MI.getOperand(0).getReg();
  6284. Register Src = MI.getOperand(1).getReg();
  6285. const LLT Ty = MRI.getType(Src);
  6286. unsigned Size = Ty.getSizeInBits();
  6287. MachineInstrBuilder BSWAP =
  6288. MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src});
  6289. // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
  6290. // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
  6291. // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
  6292. MachineInstrBuilder Swap4 =
  6293. SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0)));
  6294. // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
  6295. // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
  6296. // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
  6297. MachineInstrBuilder Swap2 =
  6298. SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC)));
  6299. // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5 6|7
  6300. // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
  6301. // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
  6302. SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
  6303. MI.eraseFromParent();
  6304. return Legalized;
  6305. }
  6306. LegalizerHelper::LegalizeResult
  6307. LegalizerHelper::lowerReadWriteRegister(MachineInstr &MI) {
  6308. MachineFunction &MF = MIRBuilder.getMF();
  6309. bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
  6310. int NameOpIdx = IsRead ? 1 : 0;
  6311. int ValRegIndex = IsRead ? 0 : 1;
  6312. Register ValReg = MI.getOperand(ValRegIndex).getReg();
  6313. const LLT Ty = MRI.getType(ValReg);
  6314. const MDString *RegStr = cast<MDString>(
  6315. cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
  6316. Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF);
  6317. if (!PhysReg.isValid())
  6318. return UnableToLegalize;
  6319. if (IsRead)
  6320. MIRBuilder.buildCopy(ValReg, PhysReg);
  6321. else
  6322. MIRBuilder.buildCopy(PhysReg, ValReg);
  6323. MI.eraseFromParent();
  6324. return Legalized;
  6325. }
  6326. LegalizerHelper::LegalizeResult
  6327. LegalizerHelper::lowerSMULH_UMULH(MachineInstr &MI) {
  6328. bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH;
  6329. unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
  6330. Register Result = MI.getOperand(0).getReg();
  6331. LLT OrigTy = MRI.getType(Result);
  6332. auto SizeInBits = OrigTy.getScalarSizeInBits();
  6333. LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2);
  6334. auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)});
  6335. auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)});
  6336. auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS);
  6337. unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
  6338. auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits);
  6339. auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt});
  6340. MIRBuilder.buildTrunc(Result, Shifted);
  6341. MI.eraseFromParent();
  6342. return Legalized;
  6343. }
  6344. LegalizerHelper::LegalizeResult
  6345. LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) {
  6346. Register DstReg = MI.getOperand(0).getReg();
  6347. Register SrcReg = MI.getOperand(1).getReg();
  6348. LLT DstTy = MRI.getType(DstReg);
  6349. LLT SrcTy = MRI.getType(SrcReg);
  6350. uint64_t Mask = MI.getOperand(2).getImm();
  6351. if (Mask == 0) {
  6352. MIRBuilder.buildConstant(DstReg, 0);
  6353. MI.eraseFromParent();
  6354. return Legalized;
  6355. }
  6356. if ((Mask & fcAllFlags) == fcAllFlags) {
  6357. MIRBuilder.buildConstant(DstReg, 1);
  6358. MI.eraseFromParent();
  6359. return Legalized;
  6360. }
  6361. unsigned BitSize = SrcTy.getScalarSizeInBits();
  6362. const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
  6363. LLT IntTy = LLT::scalar(BitSize);
  6364. if (SrcTy.isVector())
  6365. IntTy = LLT::vector(SrcTy.getElementCount(), IntTy);
  6366. auto AsInt = MIRBuilder.buildCopy(IntTy, SrcReg);
  6367. // Various masks.
  6368. APInt SignBit = APInt::getSignMask(BitSize);
  6369. APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
  6370. APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
  6371. APInt ExpMask = Inf;
  6372. APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
  6373. APInt QNaNBitMask =
  6374. APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
  6375. APInt InvertionMask = APInt::getAllOnesValue(DstTy.getScalarSizeInBits());
  6376. auto SignBitC = MIRBuilder.buildConstant(IntTy, SignBit);
  6377. auto ValueMaskC = MIRBuilder.buildConstant(IntTy, ValueMask);
  6378. auto InfC = MIRBuilder.buildConstant(IntTy, Inf);
  6379. auto ExpMaskC = MIRBuilder.buildConstant(IntTy, ExpMask);
  6380. auto ZeroC = MIRBuilder.buildConstant(IntTy, 0);
  6381. auto Abs = MIRBuilder.buildAnd(IntTy, AsInt, ValueMaskC);
  6382. auto Sign =
  6383. MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_NE, DstTy, AsInt, Abs);
  6384. auto Res = MIRBuilder.buildConstant(DstTy, 0);
  6385. const auto appendToRes = [&](MachineInstrBuilder ToAppend) {
  6386. Res = MIRBuilder.buildOr(DstTy, Res, ToAppend);
  6387. };
  6388. // Tests that involve more than one class should be processed first.
  6389. if ((Mask & fcFinite) == fcFinite) {
  6390. // finite(V) ==> abs(V) u< exp_mask
  6391. appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
  6392. ExpMaskC));
  6393. Mask &= ~fcFinite;
  6394. } else if ((Mask & fcFinite) == fcPosFinite) {
  6395. // finite(V) && V > 0 ==> V u< exp_mask
  6396. appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, AsInt,
  6397. ExpMaskC));
  6398. Mask &= ~fcPosFinite;
  6399. } else if ((Mask & fcFinite) == fcNegFinite) {
  6400. // finite(V) && V < 0 ==> abs(V) u< exp_mask && signbit == 1
  6401. auto Cmp = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
  6402. ExpMaskC);
  6403. auto And = MIRBuilder.buildAnd(DstTy, Cmp, Sign);
  6404. appendToRes(And);
  6405. Mask &= ~fcNegFinite;
  6406. }
  6407. // Check for individual classes.
  6408. if (unsigned PartialCheck = Mask & fcZero) {
  6409. if (PartialCheck == fcPosZero)
  6410. appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
  6411. AsInt, ZeroC));
  6412. else if (PartialCheck == fcZero)
  6413. appendToRes(
  6414. MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, ZeroC));
  6415. else // fcNegZero
  6416. appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
  6417. AsInt, SignBitC));
  6418. }
  6419. if (unsigned PartialCheck = Mask & fcInf) {
  6420. if (PartialCheck == fcPosInf)
  6421. appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
  6422. AsInt, InfC));
  6423. else if (PartialCheck == fcInf)
  6424. appendToRes(
  6425. MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, InfC));
  6426. else { // fcNegInf
  6427. APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
  6428. auto NegInfC = MIRBuilder.buildConstant(IntTy, NegInf);
  6429. appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
  6430. AsInt, NegInfC));
  6431. }
  6432. }
  6433. if (unsigned PartialCheck = Mask & fcNan) {
  6434. auto InfWithQnanBitC = MIRBuilder.buildConstant(IntTy, Inf | QNaNBitMask);
  6435. if (PartialCheck == fcNan) {
  6436. // isnan(V) ==> abs(V) u> int(inf)
  6437. appendToRes(
  6438. MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC));
  6439. } else if (PartialCheck == fcQNan) {
  6440. // isquiet(V) ==> abs(V) u>= (unsigned(Inf) | quiet_bit)
  6441. appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGE, DstTy, Abs,
  6442. InfWithQnanBitC));
  6443. } else { // fcSNan
  6444. // issignaling(V) ==> abs(V) u> unsigned(Inf) &&
  6445. // abs(V) u< (unsigned(Inf) | quiet_bit)
  6446. auto IsNan =
  6447. MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC);
  6448. auto IsNotQnan = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy,
  6449. Abs, InfWithQnanBitC);
  6450. appendToRes(MIRBuilder.buildAnd(DstTy, IsNan, IsNotQnan));
  6451. }
  6452. }
  6453. if (unsigned PartialCheck = Mask & fcSubnormal) {
  6454. // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set)
  6455. // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set)
  6456. auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs;
  6457. auto OneC = MIRBuilder.buildConstant(IntTy, 1);
  6458. auto VMinusOne = MIRBuilder.buildSub(IntTy, V, OneC);
  6459. auto SubnormalRes =
  6460. MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, VMinusOne,
  6461. MIRBuilder.buildConstant(IntTy, AllOneMantissa));
  6462. if (PartialCheck == fcNegSubnormal)
  6463. SubnormalRes = MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
  6464. appendToRes(SubnormalRes);
  6465. }
  6466. if (unsigned PartialCheck = Mask & fcNormal) {
  6467. // isnormal(V) ==> (0 u< exp u< max_exp) ==> (unsigned(exp-1) u<
  6468. // (max_exp-1))
  6469. APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
  6470. auto ExpMinusOne = MIRBuilder.buildSub(
  6471. IntTy, Abs, MIRBuilder.buildConstant(IntTy, ExpLSB));
  6472. APInt MaxExpMinusOne = ExpMask - ExpLSB;
  6473. auto NormalRes =
  6474. MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, ExpMinusOne,
  6475. MIRBuilder.buildConstant(IntTy, MaxExpMinusOne));
  6476. if (PartialCheck == fcNegNormal)
  6477. NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, Sign);
  6478. else if (PartialCheck == fcPosNormal) {
  6479. auto PosSign = MIRBuilder.buildXor(
  6480. DstTy, Sign, MIRBuilder.buildConstant(DstTy, InvertionMask));
  6481. NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, PosSign);
  6482. }
  6483. appendToRes(NormalRes);
  6484. }
  6485. MIRBuilder.buildCopy(DstReg, Res);
  6486. MI.eraseFromParent();
  6487. return Legalized;
  6488. }
  6489. LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) {
  6490. // Implement vector G_SELECT in terms of XOR, AND, OR.
  6491. Register DstReg = MI.getOperand(0).getReg();
  6492. Register MaskReg = MI.getOperand(1).getReg();
  6493. Register Op1Reg = MI.getOperand(2).getReg();
  6494. Register Op2Reg = MI.getOperand(3).getReg();
  6495. LLT DstTy = MRI.getType(DstReg);
  6496. LLT MaskTy = MRI.getType(MaskReg);
  6497. if (!DstTy.isVector())
  6498. return UnableToLegalize;
  6499. bool IsEltPtr = DstTy.getElementType().isPointer();
  6500. if (IsEltPtr) {
  6501. LLT ScalarPtrTy = LLT::scalar(DstTy.getScalarSizeInBits());
  6502. LLT NewTy = DstTy.changeElementType(ScalarPtrTy);
  6503. Op1Reg = MIRBuilder.buildPtrToInt(NewTy, Op1Reg).getReg(0);
  6504. Op2Reg = MIRBuilder.buildPtrToInt(NewTy, Op2Reg).getReg(0);
  6505. DstTy = NewTy;
  6506. }
  6507. if (MaskTy.isScalar()) {
  6508. // Turn the scalar condition into a vector condition mask.
  6509. Register MaskElt = MaskReg;
  6510. // The condition was potentially zero extended before, but we want a sign
  6511. // extended boolean.
  6512. if (MaskTy != LLT::scalar(1))
  6513. MaskElt = MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0);
  6514. // Continue the sign extension (or truncate) to match the data type.
  6515. MaskElt = MIRBuilder.buildSExtOrTrunc(DstTy.getElementType(),
  6516. MaskElt).getReg(0);
  6517. // Generate a vector splat idiom.
  6518. auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
  6519. MaskReg = ShufSplat.getReg(0);
  6520. MaskTy = DstTy;
  6521. }
  6522. if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
  6523. return UnableToLegalize;
  6524. }
  6525. auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg);
  6526. auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
  6527. auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
  6528. if (IsEltPtr) {
  6529. auto Or = MIRBuilder.buildOr(DstTy, NewOp1, NewOp2);
  6530. MIRBuilder.buildIntToPtr(DstReg, Or);
  6531. } else {
  6532. MIRBuilder.buildOr(DstReg, NewOp1, NewOp2);
  6533. }
  6534. MI.eraseFromParent();
  6535. return Legalized;
  6536. }
  6537. LegalizerHelper::LegalizeResult LegalizerHelper::lowerDIVREM(MachineInstr &MI) {
  6538. // Split DIVREM into individual instructions.
  6539. unsigned Opcode = MI.getOpcode();
  6540. MIRBuilder.buildInstr(
  6541. Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
  6542. : TargetOpcode::G_UDIV,
  6543. {MI.getOperand(0).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
  6544. MIRBuilder.buildInstr(
  6545. Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
  6546. : TargetOpcode::G_UREM,
  6547. {MI.getOperand(1).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
  6548. MI.eraseFromParent();
  6549. return Legalized;
  6550. }
  6551. LegalizerHelper::LegalizeResult
  6552. LegalizerHelper::lowerAbsToAddXor(MachineInstr &MI) {
  6553. // Expand %res = G_ABS %a into:
  6554. // %v1 = G_ASHR %a, scalar_size-1
  6555. // %v2 = G_ADD %a, %v1
  6556. // %res = G_XOR %v2, %v1
  6557. LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
  6558. Register OpReg = MI.getOperand(1).getReg();
  6559. auto ShiftAmt =
  6560. MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1);
  6561. auto Shift = MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
  6562. auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift);
  6563. MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift);
  6564. MI.eraseFromParent();
  6565. return Legalized;
  6566. }
  6567. LegalizerHelper::LegalizeResult
  6568. LegalizerHelper::lowerAbsToMaxNeg(MachineInstr &MI) {
  6569. // Expand %res = G_ABS %a into:
  6570. // %v1 = G_CONSTANT 0
  6571. // %v2 = G_SUB %v1, %a
  6572. // %res = G_SMAX %a, %v2
  6573. Register SrcReg = MI.getOperand(1).getReg();
  6574. LLT Ty = MRI.getType(SrcReg);
  6575. auto Zero = MIRBuilder.buildConstant(Ty, 0).getReg(0);
  6576. auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
  6577. MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub);
  6578. MI.eraseFromParent();
  6579. return Legalized;
  6580. }
  6581. LegalizerHelper::LegalizeResult
  6582. LegalizerHelper::lowerVectorReduction(MachineInstr &MI) {
  6583. Register SrcReg = MI.getOperand(1).getReg();
  6584. LLT SrcTy = MRI.getType(SrcReg);
  6585. LLT DstTy = MRI.getType(SrcReg);
  6586. // The source could be a scalar if the IR type was <1 x sN>.
  6587. if (SrcTy.isScalar()) {
  6588. if (DstTy.getSizeInBits() > SrcTy.getSizeInBits())
  6589. return UnableToLegalize; // FIXME: handle extension.
  6590. // This can be just a plain copy.
  6591. Observer.changingInstr(MI);
  6592. MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::COPY));
  6593. Observer.changedInstr(MI);
  6594. return Legalized;
  6595. }
  6596. return UnableToLegalize;;
  6597. }
  6598. static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
  6599. // On Darwin, -Os means optimize for size without hurting performance, so
  6600. // only really optimize for size when -Oz (MinSize) is used.
  6601. if (MF.getTarget().getTargetTriple().isOSDarwin())
  6602. return MF.getFunction().hasMinSize();
  6603. return MF.getFunction().hasOptSize();
  6604. }
  6605. // Returns a list of types to use for memory op lowering in MemOps. A partial
  6606. // port of findOptimalMemOpLowering in TargetLowering.
  6607. static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
  6608. unsigned Limit, const MemOp &Op,
  6609. unsigned DstAS, unsigned SrcAS,
  6610. const AttributeList &FuncAttributes,
  6611. const TargetLowering &TLI) {
  6612. if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
  6613. return false;
  6614. LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
  6615. if (Ty == LLT()) {
  6616. // Use the largest scalar type whose alignment constraints are satisfied.
  6617. // We only need to check DstAlign here as SrcAlign is always greater or
  6618. // equal to DstAlign (or zero).
  6619. Ty = LLT::scalar(64);
  6620. if (Op.isFixedDstAlign())
  6621. while (Op.getDstAlign() < Ty.getSizeInBytes() &&
  6622. !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
  6623. Ty = LLT::scalar(Ty.getSizeInBytes());
  6624. assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
  6625. // FIXME: check for the largest legal type we can load/store to.
  6626. }
  6627. unsigned NumMemOps = 0;
  6628. uint64_t Size = Op.size();
  6629. while (Size) {
  6630. unsigned TySize = Ty.getSizeInBytes();
  6631. while (TySize > Size) {
  6632. // For now, only use non-vector load / store's for the left-over pieces.
  6633. LLT NewTy = Ty;
  6634. // FIXME: check for mem op safety and legality of the types. Not all of
  6635. // SDAGisms map cleanly to GISel concepts.
  6636. if (NewTy.isVector())
  6637. NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
  6638. NewTy = LLT::scalar(PowerOf2Floor(NewTy.getSizeInBits() - 1));
  6639. unsigned NewTySize = NewTy.getSizeInBytes();
  6640. assert(NewTySize > 0 && "Could not find appropriate type");
  6641. // If the new LLT cannot cover all of the remaining bits, then consider
  6642. // issuing a (or a pair of) unaligned and overlapping load / store.
  6643. unsigned Fast;
  6644. // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
  6645. MVT VT = getMVTForLLT(Ty);
  6646. if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
  6647. TLI.allowsMisalignedMemoryAccesses(
  6648. VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
  6649. MachineMemOperand::MONone, &Fast) &&
  6650. Fast)
  6651. TySize = Size;
  6652. else {
  6653. Ty = NewTy;
  6654. TySize = NewTySize;
  6655. }
  6656. }
  6657. if (++NumMemOps > Limit)
  6658. return false;
  6659. MemOps.push_back(Ty);
  6660. Size -= TySize;
  6661. }
  6662. return true;
  6663. }
  6664. static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
  6665. if (Ty.isVector())
  6666. return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
  6667. Ty.getNumElements());
  6668. return IntegerType::get(C, Ty.getSizeInBits());
  6669. }
  6670. // Get a vectorized representation of the memset value operand, GISel edition.
  6671. static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
  6672. MachineRegisterInfo &MRI = *MIB.getMRI();
  6673. unsigned NumBits = Ty.getScalarSizeInBits();
  6674. auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
  6675. if (!Ty.isVector() && ValVRegAndVal) {
  6676. APInt Scalar = ValVRegAndVal->Value.trunc(8);
  6677. APInt SplatVal = APInt::getSplat(NumBits, Scalar);
  6678. return MIB.buildConstant(Ty, SplatVal).getReg(0);
  6679. }
  6680. // Extend the byte value to the larger type, and then multiply by a magic
  6681. // value 0x010101... in order to replicate it across every byte.
  6682. // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
  6683. if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
  6684. return MIB.buildConstant(Ty, 0).getReg(0);
  6685. }
  6686. LLT ExtType = Ty.getScalarType();
  6687. auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
  6688. if (NumBits > 8) {
  6689. APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
  6690. auto MagicMI = MIB.buildConstant(ExtType, Magic);
  6691. Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
  6692. }
  6693. // For vector types create a G_BUILD_VECTOR.
  6694. if (Ty.isVector())
  6695. Val = MIB.buildSplatVector(Ty, Val).getReg(0);
  6696. return Val;
  6697. }
  6698. LegalizerHelper::LegalizeResult
  6699. LegalizerHelper::lowerMemset(MachineInstr &MI, Register Dst, Register Val,
  6700. uint64_t KnownLen, Align Alignment,
  6701. bool IsVolatile) {
  6702. auto &MF = *MI.getParent()->getParent();
  6703. const auto &TLI = *MF.getSubtarget().getTargetLowering();
  6704. auto &DL = MF.getDataLayout();
  6705. LLVMContext &C = MF.getFunction().getContext();
  6706. assert(KnownLen != 0 && "Have a zero length memset length!");
  6707. bool DstAlignCanChange = false;
  6708. MachineFrameInfo &MFI = MF.getFrameInfo();
  6709. bool OptSize = shouldLowerMemFuncForSize(MF);
  6710. MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
  6711. if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
  6712. DstAlignCanChange = true;
  6713. unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
  6714. std::vector<LLT> MemOps;
  6715. const auto &DstMMO = **MI.memoperands_begin();
  6716. MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
  6717. auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
  6718. bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
  6719. if (!findGISelOptimalMemOpLowering(MemOps, Limit,
  6720. MemOp::Set(KnownLen, DstAlignCanChange,
  6721. Alignment,
  6722. /*IsZeroMemset=*/IsZeroVal,
  6723. /*IsVolatile=*/IsVolatile),
  6724. DstPtrInfo.getAddrSpace(), ~0u,
  6725. MF.getFunction().getAttributes(), TLI))
  6726. return UnableToLegalize;
  6727. if (DstAlignCanChange) {
  6728. // Get an estimate of the type from the LLT.
  6729. Type *IRTy = getTypeForLLT(MemOps[0], C);
  6730. Align NewAlign = DL.getABITypeAlign(IRTy);
  6731. if (NewAlign > Alignment) {
  6732. Alignment = NewAlign;
  6733. unsigned FI = FIDef->getOperand(1).getIndex();
  6734. // Give the stack frame object a larger alignment if needed.
  6735. if (MFI.getObjectAlign(FI) < Alignment)
  6736. MFI.setObjectAlignment(FI, Alignment);
  6737. }
  6738. }
  6739. MachineIRBuilder MIB(MI);
  6740. // Find the largest store and generate the bit pattern for it.
  6741. LLT LargestTy = MemOps[0];
  6742. for (unsigned i = 1; i < MemOps.size(); i++)
  6743. if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
  6744. LargestTy = MemOps[i];
  6745. // The memset stored value is always defined as an s8, so in order to make it
  6746. // work with larger store types we need to repeat the bit pattern across the
  6747. // wider type.
  6748. Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
  6749. if (!MemSetValue)
  6750. return UnableToLegalize;
  6751. // Generate the stores. For each store type in the list, we generate the
  6752. // matching store of that type to the destination address.
  6753. LLT PtrTy = MRI.getType(Dst);
  6754. unsigned DstOff = 0;
  6755. unsigned Size = KnownLen;
  6756. for (unsigned I = 0; I < MemOps.size(); I++) {
  6757. LLT Ty = MemOps[I];
  6758. unsigned TySize = Ty.getSizeInBytes();
  6759. if (TySize > Size) {
  6760. // Issuing an unaligned load / store pair that overlaps with the previous
  6761. // pair. Adjust the offset accordingly.
  6762. assert(I == MemOps.size() - 1 && I != 0);
  6763. DstOff -= TySize - Size;
  6764. }
  6765. // If this store is smaller than the largest store see whether we can get
  6766. // the smaller value for free with a truncate.
  6767. Register Value = MemSetValue;
  6768. if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
  6769. MVT VT = getMVTForLLT(Ty);
  6770. MVT LargestVT = getMVTForLLT(LargestTy);
  6771. if (!LargestTy.isVector() && !Ty.isVector() &&
  6772. TLI.isTruncateFree(LargestVT, VT))
  6773. Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
  6774. else
  6775. Value = getMemsetValue(Val, Ty, MIB);
  6776. if (!Value)
  6777. return UnableToLegalize;
  6778. }
  6779. auto *StoreMMO = MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
  6780. Register Ptr = Dst;
  6781. if (DstOff != 0) {
  6782. auto Offset =
  6783. MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
  6784. Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
  6785. }
  6786. MIB.buildStore(Value, Ptr, *StoreMMO);
  6787. DstOff += Ty.getSizeInBytes();
  6788. Size -= TySize;
  6789. }
  6790. MI.eraseFromParent();
  6791. return Legalized;
  6792. }
  6793. LegalizerHelper::LegalizeResult
  6794. LegalizerHelper::lowerMemcpyInline(MachineInstr &MI) {
  6795. assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
  6796. Register Dst = MI.getOperand(0).getReg();
  6797. Register Src = MI.getOperand(1).getReg();
  6798. Register Len = MI.getOperand(2).getReg();
  6799. const auto *MMOIt = MI.memoperands_begin();
  6800. const MachineMemOperand *MemOp = *MMOIt;
  6801. bool IsVolatile = MemOp->isVolatile();
  6802. // See if this is a constant length copy
  6803. auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
  6804. // FIXME: support dynamically sized G_MEMCPY_INLINE
  6805. assert(LenVRegAndVal &&
  6806. "inline memcpy with dynamic size is not yet supported");
  6807. uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
  6808. if (KnownLen == 0) {
  6809. MI.eraseFromParent();
  6810. return Legalized;
  6811. }
  6812. const auto &DstMMO = **MI.memoperands_begin();
  6813. const auto &SrcMMO = **std::next(MI.memoperands_begin());
  6814. Align DstAlign = DstMMO.getBaseAlign();
  6815. Align SrcAlign = SrcMMO.getBaseAlign();
  6816. return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
  6817. IsVolatile);
  6818. }
  6819. LegalizerHelper::LegalizeResult
  6820. LegalizerHelper::lowerMemcpyInline(MachineInstr &MI, Register Dst, Register Src,
  6821. uint64_t KnownLen, Align DstAlign,
  6822. Align SrcAlign, bool IsVolatile) {
  6823. assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
  6824. return lowerMemcpy(MI, Dst, Src, KnownLen,
  6825. std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
  6826. IsVolatile);
  6827. }
  6828. LegalizerHelper::LegalizeResult
  6829. LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
  6830. uint64_t KnownLen, uint64_t Limit, Align DstAlign,
  6831. Align SrcAlign, bool IsVolatile) {
  6832. auto &MF = *MI.getParent()->getParent();
  6833. const auto &TLI = *MF.getSubtarget().getTargetLowering();
  6834. auto &DL = MF.getDataLayout();
  6835. LLVMContext &C = MF.getFunction().getContext();
  6836. assert(KnownLen != 0 && "Have a zero length memcpy length!");
  6837. bool DstAlignCanChange = false;
  6838. MachineFrameInfo &MFI = MF.getFrameInfo();
  6839. Align Alignment = std::min(DstAlign, SrcAlign);
  6840. MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
  6841. if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
  6842. DstAlignCanChange = true;
  6843. // FIXME: infer better src pointer alignment like SelectionDAG does here.
  6844. // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
  6845. // if the memcpy is in a tail call position.
  6846. std::vector<LLT> MemOps;
  6847. const auto &DstMMO = **MI.memoperands_begin();
  6848. const auto &SrcMMO = **std::next(MI.memoperands_begin());
  6849. MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
  6850. MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
  6851. if (!findGISelOptimalMemOpLowering(
  6852. MemOps, Limit,
  6853. MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
  6854. IsVolatile),
  6855. DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
  6856. MF.getFunction().getAttributes(), TLI))
  6857. return UnableToLegalize;
  6858. if (DstAlignCanChange) {
  6859. // Get an estimate of the type from the LLT.
  6860. Type *IRTy = getTypeForLLT(MemOps[0], C);
  6861. Align NewAlign = DL.getABITypeAlign(IRTy);
  6862. // Don't promote to an alignment that would require dynamic stack
  6863. // realignment.
  6864. const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
  6865. if (!TRI->hasStackRealignment(MF))
  6866. while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
  6867. NewAlign = NewAlign.previous();
  6868. if (NewAlign > Alignment) {
  6869. Alignment = NewAlign;
  6870. unsigned FI = FIDef->getOperand(1).getIndex();
  6871. // Give the stack frame object a larger alignment if needed.
  6872. if (MFI.getObjectAlign(FI) < Alignment)
  6873. MFI.setObjectAlignment(FI, Alignment);
  6874. }
  6875. }
  6876. LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
  6877. MachineIRBuilder MIB(MI);
  6878. // Now we need to emit a pair of load and stores for each of the types we've
  6879. // collected. I.e. for each type, generate a load from the source pointer of
  6880. // that type width, and then generate a corresponding store to the dest buffer
  6881. // of that value loaded. This can result in a sequence of loads and stores
  6882. // mixed types, depending on what the target specifies as good types to use.
  6883. unsigned CurrOffset = 0;
  6884. unsigned Size = KnownLen;
  6885. for (auto CopyTy : MemOps) {
  6886. // Issuing an unaligned load / store pair that overlaps with the previous
  6887. // pair. Adjust the offset accordingly.
  6888. if (CopyTy.getSizeInBytes() > Size)
  6889. CurrOffset -= CopyTy.getSizeInBytes() - Size;
  6890. // Construct MMOs for the accesses.
  6891. auto *LoadMMO =
  6892. MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
  6893. auto *StoreMMO =
  6894. MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
  6895. // Create the load.
  6896. Register LoadPtr = Src;
  6897. Register Offset;
  6898. if (CurrOffset != 0) {
  6899. LLT SrcTy = MRI.getType(Src);
  6900. Offset = MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset)
  6901. .getReg(0);
  6902. LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0);
  6903. }
  6904. auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
  6905. // Create the store.
  6906. Register StorePtr = Dst;
  6907. if (CurrOffset != 0) {
  6908. LLT DstTy = MRI.getType(Dst);
  6909. StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0);
  6910. }
  6911. MIB.buildStore(LdVal, StorePtr, *StoreMMO);
  6912. CurrOffset += CopyTy.getSizeInBytes();
  6913. Size -= CopyTy.getSizeInBytes();
  6914. }
  6915. MI.eraseFromParent();
  6916. return Legalized;
  6917. }
  6918. LegalizerHelper::LegalizeResult
  6919. LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
  6920. uint64_t KnownLen, Align DstAlign, Align SrcAlign,
  6921. bool IsVolatile) {
  6922. auto &MF = *MI.getParent()->getParent();
  6923. const auto &TLI = *MF.getSubtarget().getTargetLowering();
  6924. auto &DL = MF.getDataLayout();
  6925. LLVMContext &C = MF.getFunction().getContext();
  6926. assert(KnownLen != 0 && "Have a zero length memmove length!");
  6927. bool DstAlignCanChange = false;
  6928. MachineFrameInfo &MFI = MF.getFrameInfo();
  6929. bool OptSize = shouldLowerMemFuncForSize(MF);
  6930. Align Alignment = std::min(DstAlign, SrcAlign);
  6931. MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
  6932. if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
  6933. DstAlignCanChange = true;
  6934. unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
  6935. std::vector<LLT> MemOps;
  6936. const auto &DstMMO = **MI.memoperands_begin();
  6937. const auto &SrcMMO = **std::next(MI.memoperands_begin());
  6938. MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
  6939. MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
  6940. // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
  6941. // to a bug in it's findOptimalMemOpLowering implementation. For now do the
  6942. // same thing here.
  6943. if (!findGISelOptimalMemOpLowering(
  6944. MemOps, Limit,
  6945. MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
  6946. /*IsVolatile*/ true),
  6947. DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
  6948. MF.getFunction().getAttributes(), TLI))
  6949. return UnableToLegalize;
  6950. if (DstAlignCanChange) {
  6951. // Get an estimate of the type from the LLT.
  6952. Type *IRTy = getTypeForLLT(MemOps[0], C);
  6953. Align NewAlign = DL.getABITypeAlign(IRTy);
  6954. // Don't promote to an alignment that would require dynamic stack
  6955. // realignment.
  6956. const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
  6957. if (!TRI->hasStackRealignment(MF))
  6958. while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
  6959. NewAlign = NewAlign.previous();
  6960. if (NewAlign > Alignment) {
  6961. Alignment = NewAlign;
  6962. unsigned FI = FIDef->getOperand(1).getIndex();
  6963. // Give the stack frame object a larger alignment if needed.
  6964. if (MFI.getObjectAlign(FI) < Alignment)
  6965. MFI.setObjectAlignment(FI, Alignment);
  6966. }
  6967. }
  6968. LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
  6969. MachineIRBuilder MIB(MI);
  6970. // Memmove requires that we perform the loads first before issuing the stores.
  6971. // Apart from that, this loop is pretty much doing the same thing as the
  6972. // memcpy codegen function.
  6973. unsigned CurrOffset = 0;
  6974. SmallVector<Register, 16> LoadVals;
  6975. for (auto CopyTy : MemOps) {
  6976. // Construct MMO for the load.
  6977. auto *LoadMMO =
  6978. MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
  6979. // Create the load.
  6980. Register LoadPtr = Src;
  6981. if (CurrOffset != 0) {
  6982. LLT SrcTy = MRI.getType(Src);
  6983. auto Offset =
  6984. MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset);
  6985. LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0);
  6986. }
  6987. LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
  6988. CurrOffset += CopyTy.getSizeInBytes();
  6989. }
  6990. CurrOffset = 0;
  6991. for (unsigned I = 0; I < MemOps.size(); ++I) {
  6992. LLT CopyTy = MemOps[I];
  6993. // Now store the values loaded.
  6994. auto *StoreMMO =
  6995. MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
  6996. Register StorePtr = Dst;
  6997. if (CurrOffset != 0) {
  6998. LLT DstTy = MRI.getType(Dst);
  6999. auto Offset =
  7000. MIB.buildConstant(LLT::scalar(DstTy.getSizeInBits()), CurrOffset);
  7001. StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0);
  7002. }
  7003. MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
  7004. CurrOffset += CopyTy.getSizeInBytes();
  7005. }
  7006. MI.eraseFromParent();
  7007. return Legalized;
  7008. }
  7009. LegalizerHelper::LegalizeResult
  7010. LegalizerHelper::lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
  7011. const unsigned Opc = MI.getOpcode();
  7012. // This combine is fairly complex so it's not written with a separate
  7013. // matcher function.
  7014. assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
  7015. Opc == TargetOpcode::G_MEMSET) &&
  7016. "Expected memcpy like instruction");
  7017. auto MMOIt = MI.memoperands_begin();
  7018. const MachineMemOperand *MemOp = *MMOIt;
  7019. Align DstAlign = MemOp->getBaseAlign();
  7020. Align SrcAlign;
  7021. Register Dst = MI.getOperand(0).getReg();
  7022. Register Src = MI.getOperand(1).getReg();
  7023. Register Len = MI.getOperand(2).getReg();
  7024. if (Opc != TargetOpcode::G_MEMSET) {
  7025. assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
  7026. MemOp = *(++MMOIt);
  7027. SrcAlign = MemOp->getBaseAlign();
  7028. }
  7029. // See if this is a constant length copy
  7030. auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
  7031. if (!LenVRegAndVal)
  7032. return UnableToLegalize;
  7033. uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
  7034. if (KnownLen == 0) {
  7035. MI.eraseFromParent();
  7036. return Legalized;
  7037. }
  7038. bool IsVolatile = MemOp->isVolatile();
  7039. if (Opc == TargetOpcode::G_MEMCPY_INLINE)
  7040. return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
  7041. IsVolatile);
  7042. // Don't try to optimize volatile.
  7043. if (IsVolatile)
  7044. return UnableToLegalize;
  7045. if (MaxLen && KnownLen > MaxLen)
  7046. return UnableToLegalize;
  7047. if (Opc == TargetOpcode::G_MEMCPY) {
  7048. auto &MF = *MI.getParent()->getParent();
  7049. const auto &TLI = *MF.getSubtarget().getTargetLowering();
  7050. bool OptSize = shouldLowerMemFuncForSize(MF);
  7051. uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
  7052. return lowerMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
  7053. IsVolatile);
  7054. }
  7055. if (Opc == TargetOpcode::G_MEMMOVE)
  7056. return lowerMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
  7057. if (Opc == TargetOpcode::G_MEMSET)
  7058. return lowerMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
  7059. return UnableToLegalize;
  7060. }