CombinerHelper.cpp 220 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196
  1. //===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
  9. #include "llvm/ADT/SetVector.h"
  10. #include "llvm/ADT/SmallBitVector.h"
  11. #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
  12. #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
  13. #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
  14. #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
  15. #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
  16. #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
  17. #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
  18. #include "llvm/CodeGen/GlobalISel/Utils.h"
  19. #include "llvm/CodeGen/LowLevelType.h"
  20. #include "llvm/CodeGen/MachineBasicBlock.h"
  21. #include "llvm/CodeGen/MachineDominators.h"
  22. #include "llvm/CodeGen/MachineInstr.h"
  23. #include "llvm/CodeGen/MachineMemOperand.h"
  24. #include "llvm/CodeGen/MachineRegisterInfo.h"
  25. #include "llvm/CodeGen/RegisterBankInfo.h"
  26. #include "llvm/CodeGen/TargetInstrInfo.h"
  27. #include "llvm/CodeGen/TargetLowering.h"
  28. #include "llvm/CodeGen/TargetOpcodes.h"
  29. #include "llvm/IR/DataLayout.h"
  30. #include "llvm/IR/InstrTypes.h"
  31. #include "llvm/Support/Casting.h"
  32. #include "llvm/Support/DivisionByConstantInfo.h"
  33. #include "llvm/Support/MathExtras.h"
  34. #include "llvm/Target/TargetMachine.h"
  35. #include <cmath>
  36. #include <optional>
  37. #include <tuple>
  38. #define DEBUG_TYPE "gi-combiner"
  39. using namespace llvm;
  40. using namespace MIPatternMatch;
  41. // Option to allow testing of the combiner while no targets know about indexed
  42. // addressing.
  43. static cl::opt<bool>
  44. ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false),
  45. cl::desc("Force all indexed operations to be "
  46. "legal for the GlobalISel combiner"));
  47. CombinerHelper::CombinerHelper(GISelChangeObserver &Observer,
  48. MachineIRBuilder &B, bool IsPreLegalize,
  49. GISelKnownBits *KB, MachineDominatorTree *MDT,
  50. const LegalizerInfo *LI)
  51. : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer), KB(KB),
  52. MDT(MDT), IsPreLegalize(IsPreLegalize), LI(LI),
  53. RBI(Builder.getMF().getSubtarget().getRegBankInfo()),
  54. TRI(Builder.getMF().getSubtarget().getRegisterInfo()) {
  55. (void)this->KB;
  56. }
  57. const TargetLowering &CombinerHelper::getTargetLowering() const {
  58. return *Builder.getMF().getSubtarget().getTargetLowering();
  59. }
  60. /// \returns The little endian in-memory byte position of byte \p I in a
  61. /// \p ByteWidth bytes wide type.
  62. ///
  63. /// E.g. Given a 4-byte type x, x[0] -> byte 0
  64. static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) {
  65. assert(I < ByteWidth && "I must be in [0, ByteWidth)");
  66. return I;
  67. }
  68. /// Determines the LogBase2 value for a non-null input value using the
  69. /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
  70. static Register buildLogBase2(Register V, MachineIRBuilder &MIB) {
  71. auto &MRI = *MIB.getMRI();
  72. LLT Ty = MRI.getType(V);
  73. auto Ctlz = MIB.buildCTLZ(Ty, V);
  74. auto Base = MIB.buildConstant(Ty, Ty.getScalarSizeInBits() - 1);
  75. return MIB.buildSub(Ty, Base, Ctlz).getReg(0);
  76. }
  77. /// \returns The big endian in-memory byte position of byte \p I in a
  78. /// \p ByteWidth bytes wide type.
  79. ///
  80. /// E.g. Given a 4-byte type x, x[0] -> byte 3
  81. static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I) {
  82. assert(I < ByteWidth && "I must be in [0, ByteWidth)");
  83. return ByteWidth - I - 1;
  84. }
  85. /// Given a map from byte offsets in memory to indices in a load/store,
  86. /// determine if that map corresponds to a little or big endian byte pattern.
  87. ///
  88. /// \param MemOffset2Idx maps memory offsets to address offsets.
  89. /// \param LowestIdx is the lowest index in \p MemOffset2Idx.
  90. ///
  91. /// \returns true if the map corresponds to a big endian byte pattern, false if
  92. /// it corresponds to a little endian byte pattern, and std::nullopt otherwise.
  93. ///
  94. /// E.g. given a 32-bit type x, and x[AddrOffset], the in-memory byte patterns
  95. /// are as follows:
  96. ///
  97. /// AddrOffset Little endian Big endian
  98. /// 0 0 3
  99. /// 1 1 2
  100. /// 2 2 1
  101. /// 3 3 0
  102. static std::optional<bool>
  103. isBigEndian(const SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx,
  104. int64_t LowestIdx) {
  105. // Need at least two byte positions to decide on endianness.
  106. unsigned Width = MemOffset2Idx.size();
  107. if (Width < 2)
  108. return std::nullopt;
  109. bool BigEndian = true, LittleEndian = true;
  110. for (unsigned MemOffset = 0; MemOffset < Width; ++ MemOffset) {
  111. auto MemOffsetAndIdx = MemOffset2Idx.find(MemOffset);
  112. if (MemOffsetAndIdx == MemOffset2Idx.end())
  113. return std::nullopt;
  114. const int64_t Idx = MemOffsetAndIdx->second - LowestIdx;
  115. assert(Idx >= 0 && "Expected non-negative byte offset?");
  116. LittleEndian &= Idx == littleEndianByteAt(Width, MemOffset);
  117. BigEndian &= Idx == bigEndianByteAt(Width, MemOffset);
  118. if (!BigEndian && !LittleEndian)
  119. return std::nullopt;
  120. }
  121. assert((BigEndian != LittleEndian) &&
  122. "Pattern cannot be both big and little endian!");
  123. return BigEndian;
  124. }
  125. bool CombinerHelper::isPreLegalize() const { return IsPreLegalize; }
  126. bool CombinerHelper::isLegal(const LegalityQuery &Query) const {
  127. assert(LI && "Must have LegalizerInfo to query isLegal!");
  128. return LI->getAction(Query).Action == LegalizeActions::Legal;
  129. }
  130. bool CombinerHelper::isLegalOrBeforeLegalizer(
  131. const LegalityQuery &Query) const {
  132. return isPreLegalize() || isLegal(Query);
  133. }
  134. bool CombinerHelper::isConstantLegalOrBeforeLegalizer(const LLT Ty) const {
  135. if (!Ty.isVector())
  136. return isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, {Ty}});
  137. // Vector constants are represented as a G_BUILD_VECTOR of scalar G_CONSTANTs.
  138. if (isPreLegalize())
  139. return true;
  140. LLT EltTy = Ty.getElementType();
  141. return isLegal({TargetOpcode::G_BUILD_VECTOR, {Ty, EltTy}}) &&
  142. isLegal({TargetOpcode::G_CONSTANT, {EltTy}});
  143. }
  144. void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg,
  145. Register ToReg) const {
  146. Observer.changingAllUsesOfReg(MRI, FromReg);
  147. if (MRI.constrainRegAttrs(ToReg, FromReg))
  148. MRI.replaceRegWith(FromReg, ToReg);
  149. else
  150. Builder.buildCopy(ToReg, FromReg);
  151. Observer.finishedChangingAllUsesOfReg();
  152. }
  153. void CombinerHelper::replaceRegOpWith(MachineRegisterInfo &MRI,
  154. MachineOperand &FromRegOp,
  155. Register ToReg) const {
  156. assert(FromRegOp.getParent() && "Expected an operand in an MI");
  157. Observer.changingInstr(*FromRegOp.getParent());
  158. FromRegOp.setReg(ToReg);
  159. Observer.changedInstr(*FromRegOp.getParent());
  160. }
  161. void CombinerHelper::replaceOpcodeWith(MachineInstr &FromMI,
  162. unsigned ToOpcode) const {
  163. Observer.changingInstr(FromMI);
  164. FromMI.setDesc(Builder.getTII().get(ToOpcode));
  165. Observer.changedInstr(FromMI);
  166. }
  167. const RegisterBank *CombinerHelper::getRegBank(Register Reg) const {
  168. return RBI->getRegBank(Reg, MRI, *TRI);
  169. }
  170. void CombinerHelper::setRegBank(Register Reg, const RegisterBank *RegBank) {
  171. if (RegBank)
  172. MRI.setRegBank(Reg, *RegBank);
  173. }
  174. bool CombinerHelper::tryCombineCopy(MachineInstr &MI) {
  175. if (matchCombineCopy(MI)) {
  176. applyCombineCopy(MI);
  177. return true;
  178. }
  179. return false;
  180. }
  181. bool CombinerHelper::matchCombineCopy(MachineInstr &MI) {
  182. if (MI.getOpcode() != TargetOpcode::COPY)
  183. return false;
  184. Register DstReg = MI.getOperand(0).getReg();
  185. Register SrcReg = MI.getOperand(1).getReg();
  186. return canReplaceReg(DstReg, SrcReg, MRI);
  187. }
  188. void CombinerHelper::applyCombineCopy(MachineInstr &MI) {
  189. Register DstReg = MI.getOperand(0).getReg();
  190. Register SrcReg = MI.getOperand(1).getReg();
  191. MI.eraseFromParent();
  192. replaceRegWith(MRI, DstReg, SrcReg);
  193. }
  194. bool CombinerHelper::tryCombineConcatVectors(MachineInstr &MI) {
  195. bool IsUndef = false;
  196. SmallVector<Register, 4> Ops;
  197. if (matchCombineConcatVectors(MI, IsUndef, Ops)) {
  198. applyCombineConcatVectors(MI, IsUndef, Ops);
  199. return true;
  200. }
  201. return false;
  202. }
  203. bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef,
  204. SmallVectorImpl<Register> &Ops) {
  205. assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
  206. "Invalid instruction");
  207. IsUndef = true;
  208. MachineInstr *Undef = nullptr;
  209. // Walk over all the operands of concat vectors and check if they are
  210. // build_vector themselves or undef.
  211. // Then collect their operands in Ops.
  212. for (const MachineOperand &MO : MI.uses()) {
  213. Register Reg = MO.getReg();
  214. MachineInstr *Def = MRI.getVRegDef(Reg);
  215. assert(Def && "Operand not defined");
  216. switch (Def->getOpcode()) {
  217. case TargetOpcode::G_BUILD_VECTOR:
  218. IsUndef = false;
  219. // Remember the operands of the build_vector to fold
  220. // them into the yet-to-build flattened concat vectors.
  221. for (const MachineOperand &BuildVecMO : Def->uses())
  222. Ops.push_back(BuildVecMO.getReg());
  223. break;
  224. case TargetOpcode::G_IMPLICIT_DEF: {
  225. LLT OpType = MRI.getType(Reg);
  226. // Keep one undef value for all the undef operands.
  227. if (!Undef) {
  228. Builder.setInsertPt(*MI.getParent(), MI);
  229. Undef = Builder.buildUndef(OpType.getScalarType());
  230. }
  231. assert(MRI.getType(Undef->getOperand(0).getReg()) ==
  232. OpType.getScalarType() &&
  233. "All undefs should have the same type");
  234. // Break the undef vector in as many scalar elements as needed
  235. // for the flattening.
  236. for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements();
  237. EltIdx != EltEnd; ++EltIdx)
  238. Ops.push_back(Undef->getOperand(0).getReg());
  239. break;
  240. }
  241. default:
  242. return false;
  243. }
  244. }
  245. return true;
  246. }
  247. void CombinerHelper::applyCombineConcatVectors(
  248. MachineInstr &MI, bool IsUndef, const ArrayRef<Register> Ops) {
  249. // We determined that the concat_vectors can be flatten.
  250. // Generate the flattened build_vector.
  251. Register DstReg = MI.getOperand(0).getReg();
  252. Builder.setInsertPt(*MI.getParent(), MI);
  253. Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
  254. // Note: IsUndef is sort of redundant. We could have determine it by
  255. // checking that at all Ops are undef. Alternatively, we could have
  256. // generate a build_vector of undefs and rely on another combine to
  257. // clean that up. For now, given we already gather this information
  258. // in tryCombineConcatVectors, just save compile time and issue the
  259. // right thing.
  260. if (IsUndef)
  261. Builder.buildUndef(NewDstReg);
  262. else
  263. Builder.buildBuildVector(NewDstReg, Ops);
  264. MI.eraseFromParent();
  265. replaceRegWith(MRI, DstReg, NewDstReg);
  266. }
  267. bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) {
  268. SmallVector<Register, 4> Ops;
  269. if (matchCombineShuffleVector(MI, Ops)) {
  270. applyCombineShuffleVector(MI, Ops);
  271. return true;
  272. }
  273. return false;
  274. }
  275. bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI,
  276. SmallVectorImpl<Register> &Ops) {
  277. assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
  278. "Invalid instruction kind");
  279. LLT DstType = MRI.getType(MI.getOperand(0).getReg());
  280. Register Src1 = MI.getOperand(1).getReg();
  281. LLT SrcType = MRI.getType(Src1);
  282. // As bizarre as it may look, shuffle vector can actually produce
  283. // scalar! This is because at the IR level a <1 x ty> shuffle
  284. // vector is perfectly valid.
  285. unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1;
  286. unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1;
  287. // If the resulting vector is smaller than the size of the source
  288. // vectors being concatenated, we won't be able to replace the
  289. // shuffle vector into a concat_vectors.
  290. //
  291. // Note: We may still be able to produce a concat_vectors fed by
  292. // extract_vector_elt and so on. It is less clear that would
  293. // be better though, so don't bother for now.
  294. //
  295. // If the destination is a scalar, the size of the sources doesn't
  296. // matter. we will lower the shuffle to a plain copy. This will
  297. // work only if the source and destination have the same size. But
  298. // that's covered by the next condition.
  299. //
  300. // TODO: If the size between the source and destination don't match
  301. // we could still emit an extract vector element in that case.
  302. if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1)
  303. return false;
  304. // Check that the shuffle mask can be broken evenly between the
  305. // different sources.
  306. if (DstNumElts % SrcNumElts != 0)
  307. return false;
  308. // Mask length is a multiple of the source vector length.
  309. // Check if the shuffle is some kind of concatenation of the input
  310. // vectors.
  311. unsigned NumConcat = DstNumElts / SrcNumElts;
  312. SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
  313. ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
  314. for (unsigned i = 0; i != DstNumElts; ++i) {
  315. int Idx = Mask[i];
  316. // Undef value.
  317. if (Idx < 0)
  318. continue;
  319. // Ensure the indices in each SrcType sized piece are sequential and that
  320. // the same source is used for the whole piece.
  321. if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
  322. (ConcatSrcs[i / SrcNumElts] >= 0 &&
  323. ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts)))
  324. return false;
  325. // Remember which source this index came from.
  326. ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
  327. }
  328. // The shuffle is concatenating multiple vectors together.
  329. // Collect the different operands for that.
  330. Register UndefReg;
  331. Register Src2 = MI.getOperand(2).getReg();
  332. for (auto Src : ConcatSrcs) {
  333. if (Src < 0) {
  334. if (!UndefReg) {
  335. Builder.setInsertPt(*MI.getParent(), MI);
  336. UndefReg = Builder.buildUndef(SrcType).getReg(0);
  337. }
  338. Ops.push_back(UndefReg);
  339. } else if (Src == 0)
  340. Ops.push_back(Src1);
  341. else
  342. Ops.push_back(Src2);
  343. }
  344. return true;
  345. }
  346. void CombinerHelper::applyCombineShuffleVector(MachineInstr &MI,
  347. const ArrayRef<Register> Ops) {
  348. Register DstReg = MI.getOperand(0).getReg();
  349. Builder.setInsertPt(*MI.getParent(), MI);
  350. Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
  351. if (Ops.size() == 1)
  352. Builder.buildCopy(NewDstReg, Ops[0]);
  353. else
  354. Builder.buildMergeLikeInstr(NewDstReg, Ops);
  355. MI.eraseFromParent();
  356. replaceRegWith(MRI, DstReg, NewDstReg);
  357. }
  358. namespace {
  359. /// Select a preference between two uses. CurrentUse is the current preference
  360. /// while *ForCandidate is attributes of the candidate under consideration.
  361. PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse,
  362. const LLT TyForCandidate,
  363. unsigned OpcodeForCandidate,
  364. MachineInstr *MIForCandidate) {
  365. if (!CurrentUse.Ty.isValid()) {
  366. if (CurrentUse.ExtendOpcode == OpcodeForCandidate ||
  367. CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT)
  368. return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
  369. return CurrentUse;
  370. }
  371. // We permit the extend to hoist through basic blocks but this is only
  372. // sensible if the target has extending loads. If you end up lowering back
  373. // into a load and extend during the legalizer then the end result is
  374. // hoisting the extend up to the load.
  375. // Prefer defined extensions to undefined extensions as these are more
  376. // likely to reduce the number of instructions.
  377. if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
  378. CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT)
  379. return CurrentUse;
  380. else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
  381. OpcodeForCandidate != TargetOpcode::G_ANYEXT)
  382. return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
  383. // Prefer sign extensions to zero extensions as sign-extensions tend to be
  384. // more expensive.
  385. if (CurrentUse.Ty == TyForCandidate) {
  386. if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
  387. OpcodeForCandidate == TargetOpcode::G_ZEXT)
  388. return CurrentUse;
  389. else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
  390. OpcodeForCandidate == TargetOpcode::G_SEXT)
  391. return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
  392. }
  393. // This is potentially target specific. We've chosen the largest type
  394. // because G_TRUNC is usually free. One potential catch with this is that
  395. // some targets have a reduced number of larger registers than smaller
  396. // registers and this choice potentially increases the live-range for the
  397. // larger value.
  398. if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
  399. return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
  400. }
  401. return CurrentUse;
  402. }
  403. /// Find a suitable place to insert some instructions and insert them. This
  404. /// function accounts for special cases like inserting before a PHI node.
  405. /// The current strategy for inserting before PHI's is to duplicate the
  406. /// instructions for each predecessor. However, while that's ok for G_TRUNC
  407. /// on most targets since it generally requires no code, other targets/cases may
  408. /// want to try harder to find a dominating block.
  409. static void InsertInsnsWithoutSideEffectsBeforeUse(
  410. MachineIRBuilder &Builder, MachineInstr &DefMI, MachineOperand &UseMO,
  411. std::function<void(MachineBasicBlock *, MachineBasicBlock::iterator,
  412. MachineOperand &UseMO)>
  413. Inserter) {
  414. MachineInstr &UseMI = *UseMO.getParent();
  415. MachineBasicBlock *InsertBB = UseMI.getParent();
  416. // If the use is a PHI then we want the predecessor block instead.
  417. if (UseMI.isPHI()) {
  418. MachineOperand *PredBB = std::next(&UseMO);
  419. InsertBB = PredBB->getMBB();
  420. }
  421. // If the block is the same block as the def then we want to insert just after
  422. // the def instead of at the start of the block.
  423. if (InsertBB == DefMI.getParent()) {
  424. MachineBasicBlock::iterator InsertPt = &DefMI;
  425. Inserter(InsertBB, std::next(InsertPt), UseMO);
  426. return;
  427. }
  428. // Otherwise we want the start of the BB
  429. Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO);
  430. }
  431. } // end anonymous namespace
  432. bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
  433. PreferredTuple Preferred;
  434. if (matchCombineExtendingLoads(MI, Preferred)) {
  435. applyCombineExtendingLoads(MI, Preferred);
  436. return true;
  437. }
  438. return false;
  439. }
  440. static unsigned getExtLoadOpcForExtend(unsigned ExtOpc) {
  441. unsigned CandidateLoadOpc;
  442. switch (ExtOpc) {
  443. case TargetOpcode::G_ANYEXT:
  444. CandidateLoadOpc = TargetOpcode::G_LOAD;
  445. break;
  446. case TargetOpcode::G_SEXT:
  447. CandidateLoadOpc = TargetOpcode::G_SEXTLOAD;
  448. break;
  449. case TargetOpcode::G_ZEXT:
  450. CandidateLoadOpc = TargetOpcode::G_ZEXTLOAD;
  451. break;
  452. default:
  453. llvm_unreachable("Unexpected extend opc");
  454. }
  455. return CandidateLoadOpc;
  456. }
  457. bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI,
  458. PreferredTuple &Preferred) {
  459. // We match the loads and follow the uses to the extend instead of matching
  460. // the extends and following the def to the load. This is because the load
  461. // must remain in the same position for correctness (unless we also add code
  462. // to find a safe place to sink it) whereas the extend is freely movable.
  463. // It also prevents us from duplicating the load for the volatile case or just
  464. // for performance.
  465. GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(&MI);
  466. if (!LoadMI)
  467. return false;
  468. Register LoadReg = LoadMI->getDstReg();
  469. LLT LoadValueTy = MRI.getType(LoadReg);
  470. if (!LoadValueTy.isScalar())
  471. return false;
  472. // Most architectures are going to legalize <s8 loads into at least a 1 byte
  473. // load, and the MMOs can only describe memory accesses in multiples of bytes.
  474. // If we try to perform extload combining on those, we can end up with
  475. // %a(s8) = extload %ptr (load 1 byte from %ptr)
  476. // ... which is an illegal extload instruction.
  477. if (LoadValueTy.getSizeInBits() < 8)
  478. return false;
  479. // For non power-of-2 types, they will very likely be legalized into multiple
  480. // loads. Don't bother trying to match them into extending loads.
  481. if (!isPowerOf2_32(LoadValueTy.getSizeInBits()))
  482. return false;
  483. // Find the preferred type aside from the any-extends (unless it's the only
  484. // one) and non-extending ops. We'll emit an extending load to that type and
  485. // and emit a variant of (extend (trunc X)) for the others according to the
  486. // relative type sizes. At the same time, pick an extend to use based on the
  487. // extend involved in the chosen type.
  488. unsigned PreferredOpcode =
  489. isa<GLoad>(&MI)
  490. ? TargetOpcode::G_ANYEXT
  491. : isa<GSExtLoad>(&MI) ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
  492. Preferred = {LLT(), PreferredOpcode, nullptr};
  493. for (auto &UseMI : MRI.use_nodbg_instructions(LoadReg)) {
  494. if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
  495. UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
  496. (UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) {
  497. const auto &MMO = LoadMI->getMMO();
  498. // For atomics, only form anyextending loads.
  499. if (MMO.isAtomic() && UseMI.getOpcode() != TargetOpcode::G_ANYEXT)
  500. continue;
  501. // Check for legality.
  502. if (!isPreLegalize()) {
  503. LegalityQuery::MemDesc MMDesc(MMO);
  504. unsigned CandidateLoadOpc = getExtLoadOpcForExtend(UseMI.getOpcode());
  505. LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg());
  506. LLT SrcTy = MRI.getType(LoadMI->getPointerReg());
  507. if (LI->getAction({CandidateLoadOpc, {UseTy, SrcTy}, {MMDesc}})
  508. .Action != LegalizeActions::Legal)
  509. continue;
  510. }
  511. Preferred = ChoosePreferredUse(Preferred,
  512. MRI.getType(UseMI.getOperand(0).getReg()),
  513. UseMI.getOpcode(), &UseMI);
  514. }
  515. }
  516. // There were no extends
  517. if (!Preferred.MI)
  518. return false;
  519. // It should be impossible to chose an extend without selecting a different
  520. // type since by definition the result of an extend is larger.
  521. assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
  522. LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
  523. return true;
  524. }
  525. void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI,
  526. PreferredTuple &Preferred) {
  527. // Rewrite the load to the chosen extending load.
  528. Register ChosenDstReg = Preferred.MI->getOperand(0).getReg();
  529. // Inserter to insert a truncate back to the original type at a given point
  530. // with some basic CSE to limit truncate duplication to one per BB.
  531. DenseMap<MachineBasicBlock *, MachineInstr *> EmittedInsns;
  532. auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB,
  533. MachineBasicBlock::iterator InsertBefore,
  534. MachineOperand &UseMO) {
  535. MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
  536. if (PreviouslyEmitted) {
  537. Observer.changingInstr(*UseMO.getParent());
  538. UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg());
  539. Observer.changedInstr(*UseMO.getParent());
  540. return;
  541. }
  542. Builder.setInsertPt(*InsertIntoBB, InsertBefore);
  543. Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
  544. MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
  545. EmittedInsns[InsertIntoBB] = NewMI;
  546. replaceRegOpWith(MRI, UseMO, NewDstReg);
  547. };
  548. Observer.changingInstr(MI);
  549. unsigned LoadOpc = getExtLoadOpcForExtend(Preferred.ExtendOpcode);
  550. MI.setDesc(Builder.getTII().get(LoadOpc));
  551. // Rewrite all the uses to fix up the types.
  552. auto &LoadValue = MI.getOperand(0);
  553. SmallVector<MachineOperand *, 4> Uses;
  554. for (auto &UseMO : MRI.use_operands(LoadValue.getReg()))
  555. Uses.push_back(&UseMO);
  556. for (auto *UseMO : Uses) {
  557. MachineInstr *UseMI = UseMO->getParent();
  558. // If the extend is compatible with the preferred extend then we should fix
  559. // up the type and extend so that it uses the preferred use.
  560. if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
  561. UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
  562. Register UseDstReg = UseMI->getOperand(0).getReg();
  563. MachineOperand &UseSrcMO = UseMI->getOperand(1);
  564. const LLT UseDstTy = MRI.getType(UseDstReg);
  565. if (UseDstReg != ChosenDstReg) {
  566. if (Preferred.Ty == UseDstTy) {
  567. // If the use has the same type as the preferred use, then merge
  568. // the vregs and erase the extend. For example:
  569. // %1:_(s8) = G_LOAD ...
  570. // %2:_(s32) = G_SEXT %1(s8)
  571. // %3:_(s32) = G_ANYEXT %1(s8)
  572. // ... = ... %3(s32)
  573. // rewrites to:
  574. // %2:_(s32) = G_SEXTLOAD ...
  575. // ... = ... %2(s32)
  576. replaceRegWith(MRI, UseDstReg, ChosenDstReg);
  577. Observer.erasingInstr(*UseMO->getParent());
  578. UseMO->getParent()->eraseFromParent();
  579. } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
  580. // If the preferred size is smaller, then keep the extend but extend
  581. // from the result of the extending load. For example:
  582. // %1:_(s8) = G_LOAD ...
  583. // %2:_(s32) = G_SEXT %1(s8)
  584. // %3:_(s64) = G_ANYEXT %1(s8)
  585. // ... = ... %3(s64)
  586. /// rewrites to:
  587. // %2:_(s32) = G_SEXTLOAD ...
  588. // %3:_(s64) = G_ANYEXT %2:_(s32)
  589. // ... = ... %3(s64)
  590. replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg);
  591. } else {
  592. // If the preferred size is large, then insert a truncate. For
  593. // example:
  594. // %1:_(s8) = G_LOAD ...
  595. // %2:_(s64) = G_SEXT %1(s8)
  596. // %3:_(s32) = G_ZEXT %1(s8)
  597. // ... = ... %3(s32)
  598. /// rewrites to:
  599. // %2:_(s64) = G_SEXTLOAD ...
  600. // %4:_(s8) = G_TRUNC %2:_(s32)
  601. // %3:_(s64) = G_ZEXT %2:_(s8)
  602. // ... = ... %3(s64)
  603. InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO,
  604. InsertTruncAt);
  605. }
  606. continue;
  607. }
  608. // The use is (one of) the uses of the preferred use we chose earlier.
  609. // We're going to update the load to def this value later so just erase
  610. // the old extend.
  611. Observer.erasingInstr(*UseMO->getParent());
  612. UseMO->getParent()->eraseFromParent();
  613. continue;
  614. }
  615. // The use isn't an extend. Truncate back to the type we originally loaded.
  616. // This is free on many targets.
  617. InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt);
  618. }
  619. MI.getOperand(0).setReg(ChosenDstReg);
  620. Observer.changedInstr(MI);
  621. }
  622. bool CombinerHelper::matchCombineLoadWithAndMask(MachineInstr &MI,
  623. BuildFnTy &MatchInfo) {
  624. assert(MI.getOpcode() == TargetOpcode::G_AND);
  625. // If we have the following code:
  626. // %mask = G_CONSTANT 255
  627. // %ld = G_LOAD %ptr, (load s16)
  628. // %and = G_AND %ld, %mask
  629. //
  630. // Try to fold it into
  631. // %ld = G_ZEXTLOAD %ptr, (load s8)
  632. Register Dst = MI.getOperand(0).getReg();
  633. if (MRI.getType(Dst).isVector())
  634. return false;
  635. auto MaybeMask =
  636. getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
  637. if (!MaybeMask)
  638. return false;
  639. APInt MaskVal = MaybeMask->Value;
  640. if (!MaskVal.isMask())
  641. return false;
  642. Register SrcReg = MI.getOperand(1).getReg();
  643. // Don't use getOpcodeDef() here since intermediate instructions may have
  644. // multiple users.
  645. GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(MRI.getVRegDef(SrcReg));
  646. if (!LoadMI || !MRI.hasOneNonDBGUse(LoadMI->getDstReg()))
  647. return false;
  648. Register LoadReg = LoadMI->getDstReg();
  649. LLT RegTy = MRI.getType(LoadReg);
  650. Register PtrReg = LoadMI->getPointerReg();
  651. unsigned RegSize = RegTy.getSizeInBits();
  652. uint64_t LoadSizeBits = LoadMI->getMemSizeInBits();
  653. unsigned MaskSizeBits = MaskVal.countTrailingOnes();
  654. // The mask may not be larger than the in-memory type, as it might cover sign
  655. // extended bits
  656. if (MaskSizeBits > LoadSizeBits)
  657. return false;
  658. // If the mask covers the whole destination register, there's nothing to
  659. // extend
  660. if (MaskSizeBits >= RegSize)
  661. return false;
  662. // Most targets cannot deal with loads of size < 8 and need to re-legalize to
  663. // at least byte loads. Avoid creating such loads here
  664. if (MaskSizeBits < 8 || !isPowerOf2_32(MaskSizeBits))
  665. return false;
  666. const MachineMemOperand &MMO = LoadMI->getMMO();
  667. LegalityQuery::MemDesc MemDesc(MMO);
  668. // Don't modify the memory access size if this is atomic/volatile, but we can
  669. // still adjust the opcode to indicate the high bit behavior.
  670. if (LoadMI->isSimple())
  671. MemDesc.MemoryTy = LLT::scalar(MaskSizeBits);
  672. else if (LoadSizeBits > MaskSizeBits || LoadSizeBits == RegSize)
  673. return false;
  674. // TODO: Could check if it's legal with the reduced or original memory size.
  675. if (!isLegalOrBeforeLegalizer(
  676. {TargetOpcode::G_ZEXTLOAD, {RegTy, MRI.getType(PtrReg)}, {MemDesc}}))
  677. return false;
  678. MatchInfo = [=](MachineIRBuilder &B) {
  679. B.setInstrAndDebugLoc(*LoadMI);
  680. auto &MF = B.getMF();
  681. auto PtrInfo = MMO.getPointerInfo();
  682. auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MemDesc.MemoryTy);
  683. B.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, Dst, PtrReg, *NewMMO);
  684. LoadMI->eraseFromParent();
  685. };
  686. return true;
  687. }
  688. bool CombinerHelper::isPredecessor(const MachineInstr &DefMI,
  689. const MachineInstr &UseMI) {
  690. assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
  691. "shouldn't consider debug uses");
  692. assert(DefMI.getParent() == UseMI.getParent());
  693. if (&DefMI == &UseMI)
  694. return true;
  695. const MachineBasicBlock &MBB = *DefMI.getParent();
  696. auto DefOrUse = find_if(MBB, [&DefMI, &UseMI](const MachineInstr &MI) {
  697. return &MI == &DefMI || &MI == &UseMI;
  698. });
  699. if (DefOrUse == MBB.end())
  700. llvm_unreachable("Block must contain both DefMI and UseMI!");
  701. return &*DefOrUse == &DefMI;
  702. }
  703. bool CombinerHelper::dominates(const MachineInstr &DefMI,
  704. const MachineInstr &UseMI) {
  705. assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
  706. "shouldn't consider debug uses");
  707. if (MDT)
  708. return MDT->dominates(&DefMI, &UseMI);
  709. else if (DefMI.getParent() != UseMI.getParent())
  710. return false;
  711. return isPredecessor(DefMI, UseMI);
  712. }
  713. bool CombinerHelper::matchSextTruncSextLoad(MachineInstr &MI) {
  714. assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
  715. Register SrcReg = MI.getOperand(1).getReg();
  716. Register LoadUser = SrcReg;
  717. if (MRI.getType(SrcReg).isVector())
  718. return false;
  719. Register TruncSrc;
  720. if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))))
  721. LoadUser = TruncSrc;
  722. uint64_t SizeInBits = MI.getOperand(2).getImm();
  723. // If the source is a G_SEXTLOAD from the same bit width, then we don't
  724. // need any extend at all, just a truncate.
  725. if (auto *LoadMI = getOpcodeDef<GSExtLoad>(LoadUser, MRI)) {
  726. // If truncating more than the original extended value, abort.
  727. auto LoadSizeBits = LoadMI->getMemSizeInBits();
  728. if (TruncSrc && MRI.getType(TruncSrc).getSizeInBits() < LoadSizeBits)
  729. return false;
  730. if (LoadSizeBits == SizeInBits)
  731. return true;
  732. }
  733. return false;
  734. }
  735. void CombinerHelper::applySextTruncSextLoad(MachineInstr &MI) {
  736. assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
  737. Builder.setInstrAndDebugLoc(MI);
  738. Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
  739. MI.eraseFromParent();
  740. }
  741. bool CombinerHelper::matchSextInRegOfLoad(
  742. MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
  743. assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
  744. Register DstReg = MI.getOperand(0).getReg();
  745. LLT RegTy = MRI.getType(DstReg);
  746. // Only supports scalars for now.
  747. if (RegTy.isVector())
  748. return false;
  749. Register SrcReg = MI.getOperand(1).getReg();
  750. auto *LoadDef = getOpcodeDef<GLoad>(SrcReg, MRI);
  751. if (!LoadDef || !MRI.hasOneNonDBGUse(DstReg))
  752. return false;
  753. uint64_t MemBits = LoadDef->getMemSizeInBits();
  754. // If the sign extend extends from a narrower width than the load's width,
  755. // then we can narrow the load width when we combine to a G_SEXTLOAD.
  756. // Avoid widening the load at all.
  757. unsigned NewSizeBits = std::min((uint64_t)MI.getOperand(2).getImm(), MemBits);
  758. // Don't generate G_SEXTLOADs with a < 1 byte width.
  759. if (NewSizeBits < 8)
  760. return false;
  761. // Don't bother creating a non-power-2 sextload, it will likely be broken up
  762. // anyway for most targets.
  763. if (!isPowerOf2_32(NewSizeBits))
  764. return false;
  765. const MachineMemOperand &MMO = LoadDef->getMMO();
  766. LegalityQuery::MemDesc MMDesc(MMO);
  767. // Don't modify the memory access size if this is atomic/volatile, but we can
  768. // still adjust the opcode to indicate the high bit behavior.
  769. if (LoadDef->isSimple())
  770. MMDesc.MemoryTy = LLT::scalar(NewSizeBits);
  771. else if (MemBits > NewSizeBits || MemBits == RegTy.getSizeInBits())
  772. return false;
  773. // TODO: Could check if it's legal with the reduced or original memory size.
  774. if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SEXTLOAD,
  775. {MRI.getType(LoadDef->getDstReg()),
  776. MRI.getType(LoadDef->getPointerReg())},
  777. {MMDesc}}))
  778. return false;
  779. MatchInfo = std::make_tuple(LoadDef->getDstReg(), NewSizeBits);
  780. return true;
  781. }
  782. void CombinerHelper::applySextInRegOfLoad(
  783. MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
  784. assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
  785. Register LoadReg;
  786. unsigned ScalarSizeBits;
  787. std::tie(LoadReg, ScalarSizeBits) = MatchInfo;
  788. GLoad *LoadDef = cast<GLoad>(MRI.getVRegDef(LoadReg));
  789. // If we have the following:
  790. // %ld = G_LOAD %ptr, (load 2)
  791. // %ext = G_SEXT_INREG %ld, 8
  792. // ==>
  793. // %ld = G_SEXTLOAD %ptr (load 1)
  794. auto &MMO = LoadDef->getMMO();
  795. Builder.setInstrAndDebugLoc(*LoadDef);
  796. auto &MF = Builder.getMF();
  797. auto PtrInfo = MMO.getPointerInfo();
  798. auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8);
  799. Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(),
  800. LoadDef->getPointerReg(), *NewMMO);
  801. MI.eraseFromParent();
  802. }
  803. bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr,
  804. Register &Base, Register &Offset) {
  805. auto &MF = *MI.getParent()->getParent();
  806. const auto &TLI = *MF.getSubtarget().getTargetLowering();
  807. #ifndef NDEBUG
  808. unsigned Opcode = MI.getOpcode();
  809. assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
  810. Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
  811. #endif
  812. Base = MI.getOperand(1).getReg();
  813. MachineInstr *BaseDef = MRI.getUniqueVRegDef(Base);
  814. if (BaseDef && BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
  815. return false;
  816. LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI);
  817. // FIXME: The following use traversal needs a bail out for patholigical cases.
  818. for (auto &Use : MRI.use_nodbg_instructions(Base)) {
  819. if (Use.getOpcode() != TargetOpcode::G_PTR_ADD)
  820. continue;
  821. Offset = Use.getOperand(2).getReg();
  822. if (!ForceLegalIndexing &&
  823. !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ false, MRI)) {
  824. LLVM_DEBUG(dbgs() << " Ignoring candidate with illegal addrmode: "
  825. << Use);
  826. continue;
  827. }
  828. // Make sure the offset calculation is before the potentially indexed op.
  829. // FIXME: we really care about dependency here. The offset calculation might
  830. // be movable.
  831. MachineInstr *OffsetDef = MRI.getUniqueVRegDef(Offset);
  832. if (!OffsetDef || !dominates(*OffsetDef, MI)) {
  833. LLVM_DEBUG(dbgs() << " Ignoring candidate with offset after mem-op: "
  834. << Use);
  835. continue;
  836. }
  837. // FIXME: check whether all uses of Base are load/store with foldable
  838. // addressing modes. If so, using the normal addr-modes is better than
  839. // forming an indexed one.
  840. bool MemOpDominatesAddrUses = true;
  841. for (auto &PtrAddUse :
  842. MRI.use_nodbg_instructions(Use.getOperand(0).getReg())) {
  843. if (!dominates(MI, PtrAddUse)) {
  844. MemOpDominatesAddrUses = false;
  845. break;
  846. }
  847. }
  848. if (!MemOpDominatesAddrUses) {
  849. LLVM_DEBUG(
  850. dbgs() << " Ignoring candidate as memop does not dominate uses: "
  851. << Use);
  852. continue;
  853. }
  854. LLVM_DEBUG(dbgs() << " Found match: " << Use);
  855. Addr = Use.getOperand(0).getReg();
  856. return true;
  857. }
  858. return false;
  859. }
  860. bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr,
  861. Register &Base, Register &Offset) {
  862. auto &MF = *MI.getParent()->getParent();
  863. const auto &TLI = *MF.getSubtarget().getTargetLowering();
  864. #ifndef NDEBUG
  865. unsigned Opcode = MI.getOpcode();
  866. assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
  867. Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
  868. #endif
  869. Addr = MI.getOperand(1).getReg();
  870. MachineInstr *AddrDef = getOpcodeDef(TargetOpcode::G_PTR_ADD, Addr, MRI);
  871. if (!AddrDef || MRI.hasOneNonDBGUse(Addr))
  872. return false;
  873. Base = AddrDef->getOperand(1).getReg();
  874. Offset = AddrDef->getOperand(2).getReg();
  875. LLVM_DEBUG(dbgs() << "Found potential pre-indexed load_store: " << MI);
  876. if (!ForceLegalIndexing &&
  877. !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ true, MRI)) {
  878. LLVM_DEBUG(dbgs() << " Skipping, not legal for target");
  879. return false;
  880. }
  881. MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI);
  882. if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
  883. LLVM_DEBUG(dbgs() << " Skipping, frame index would need copy anyway.");
  884. return false;
  885. }
  886. if (MI.getOpcode() == TargetOpcode::G_STORE) {
  887. // Would require a copy.
  888. if (Base == MI.getOperand(0).getReg()) {
  889. LLVM_DEBUG(dbgs() << " Skipping, storing base so need copy anyway.");
  890. return false;
  891. }
  892. // We're expecting one use of Addr in MI, but it could also be the
  893. // value stored, which isn't actually dominated by the instruction.
  894. if (MI.getOperand(0).getReg() == Addr) {
  895. LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses");
  896. return false;
  897. }
  898. }
  899. // FIXME: check whether all uses of the base pointer are constant PtrAdds.
  900. // That might allow us to end base's liveness here by adjusting the constant.
  901. for (auto &UseMI : MRI.use_nodbg_instructions(Addr)) {
  902. if (!dominates(MI, UseMI)) {
  903. LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses.");
  904. return false;
  905. }
  906. }
  907. return true;
  908. }
  909. bool CombinerHelper::tryCombineIndexedLoadStore(MachineInstr &MI) {
  910. IndexedLoadStoreMatchInfo MatchInfo;
  911. if (matchCombineIndexedLoadStore(MI, MatchInfo)) {
  912. applyCombineIndexedLoadStore(MI, MatchInfo);
  913. return true;
  914. }
  915. return false;
  916. }
  917. bool CombinerHelper::matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
  918. unsigned Opcode = MI.getOpcode();
  919. if (Opcode != TargetOpcode::G_LOAD && Opcode != TargetOpcode::G_SEXTLOAD &&
  920. Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE)
  921. return false;
  922. // For now, no targets actually support these opcodes so don't waste time
  923. // running these unless we're forced to for testing.
  924. if (!ForceLegalIndexing)
  925. return false;
  926. MatchInfo.IsPre = findPreIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base,
  927. MatchInfo.Offset);
  928. if (!MatchInfo.IsPre &&
  929. !findPostIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base,
  930. MatchInfo.Offset))
  931. return false;
  932. return true;
  933. }
  934. void CombinerHelper::applyCombineIndexedLoadStore(
  935. MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
  936. MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
  937. MachineIRBuilder MIRBuilder(MI);
  938. unsigned Opcode = MI.getOpcode();
  939. bool IsStore = Opcode == TargetOpcode::G_STORE;
  940. unsigned NewOpcode;
  941. switch (Opcode) {
  942. case TargetOpcode::G_LOAD:
  943. NewOpcode = TargetOpcode::G_INDEXED_LOAD;
  944. break;
  945. case TargetOpcode::G_SEXTLOAD:
  946. NewOpcode = TargetOpcode::G_INDEXED_SEXTLOAD;
  947. break;
  948. case TargetOpcode::G_ZEXTLOAD:
  949. NewOpcode = TargetOpcode::G_INDEXED_ZEXTLOAD;
  950. break;
  951. case TargetOpcode::G_STORE:
  952. NewOpcode = TargetOpcode::G_INDEXED_STORE;
  953. break;
  954. default:
  955. llvm_unreachable("Unknown load/store opcode");
  956. }
  957. auto MIB = MIRBuilder.buildInstr(NewOpcode);
  958. if (IsStore) {
  959. MIB.addDef(MatchInfo.Addr);
  960. MIB.addUse(MI.getOperand(0).getReg());
  961. } else {
  962. MIB.addDef(MI.getOperand(0).getReg());
  963. MIB.addDef(MatchInfo.Addr);
  964. }
  965. MIB.addUse(MatchInfo.Base);
  966. MIB.addUse(MatchInfo.Offset);
  967. MIB.addImm(MatchInfo.IsPre);
  968. MI.eraseFromParent();
  969. AddrDef.eraseFromParent();
  970. LLVM_DEBUG(dbgs() << " Combinined to indexed operation");
  971. }
  972. bool CombinerHelper::matchCombineDivRem(MachineInstr &MI,
  973. MachineInstr *&OtherMI) {
  974. unsigned Opcode = MI.getOpcode();
  975. bool IsDiv, IsSigned;
  976. switch (Opcode) {
  977. default:
  978. llvm_unreachable("Unexpected opcode!");
  979. case TargetOpcode::G_SDIV:
  980. case TargetOpcode::G_UDIV: {
  981. IsDiv = true;
  982. IsSigned = Opcode == TargetOpcode::G_SDIV;
  983. break;
  984. }
  985. case TargetOpcode::G_SREM:
  986. case TargetOpcode::G_UREM: {
  987. IsDiv = false;
  988. IsSigned = Opcode == TargetOpcode::G_SREM;
  989. break;
  990. }
  991. }
  992. Register Src1 = MI.getOperand(1).getReg();
  993. unsigned DivOpcode, RemOpcode, DivremOpcode;
  994. if (IsSigned) {
  995. DivOpcode = TargetOpcode::G_SDIV;
  996. RemOpcode = TargetOpcode::G_SREM;
  997. DivremOpcode = TargetOpcode::G_SDIVREM;
  998. } else {
  999. DivOpcode = TargetOpcode::G_UDIV;
  1000. RemOpcode = TargetOpcode::G_UREM;
  1001. DivremOpcode = TargetOpcode::G_UDIVREM;
  1002. }
  1003. if (!isLegalOrBeforeLegalizer({DivremOpcode, {MRI.getType(Src1)}}))
  1004. return false;
  1005. // Combine:
  1006. // %div:_ = G_[SU]DIV %src1:_, %src2:_
  1007. // %rem:_ = G_[SU]REM %src1:_, %src2:_
  1008. // into:
  1009. // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
  1010. // Combine:
  1011. // %rem:_ = G_[SU]REM %src1:_, %src2:_
  1012. // %div:_ = G_[SU]DIV %src1:_, %src2:_
  1013. // into:
  1014. // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
  1015. for (auto &UseMI : MRI.use_nodbg_instructions(Src1)) {
  1016. if (MI.getParent() == UseMI.getParent() &&
  1017. ((IsDiv && UseMI.getOpcode() == RemOpcode) ||
  1018. (!IsDiv && UseMI.getOpcode() == DivOpcode)) &&
  1019. matchEqualDefs(MI.getOperand(2), UseMI.getOperand(2)) &&
  1020. matchEqualDefs(MI.getOperand(1), UseMI.getOperand(1))) {
  1021. OtherMI = &UseMI;
  1022. return true;
  1023. }
  1024. }
  1025. return false;
  1026. }
  1027. void CombinerHelper::applyCombineDivRem(MachineInstr &MI,
  1028. MachineInstr *&OtherMI) {
  1029. unsigned Opcode = MI.getOpcode();
  1030. assert(OtherMI && "OtherMI shouldn't be empty.");
  1031. Register DestDivReg, DestRemReg;
  1032. if (Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_UDIV) {
  1033. DestDivReg = MI.getOperand(0).getReg();
  1034. DestRemReg = OtherMI->getOperand(0).getReg();
  1035. } else {
  1036. DestDivReg = OtherMI->getOperand(0).getReg();
  1037. DestRemReg = MI.getOperand(0).getReg();
  1038. }
  1039. bool IsSigned =
  1040. Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM;
  1041. // Check which instruction is first in the block so we don't break def-use
  1042. // deps by "moving" the instruction incorrectly.
  1043. if (dominates(MI, *OtherMI))
  1044. Builder.setInstrAndDebugLoc(MI);
  1045. else
  1046. Builder.setInstrAndDebugLoc(*OtherMI);
  1047. Builder.buildInstr(IsSigned ? TargetOpcode::G_SDIVREM
  1048. : TargetOpcode::G_UDIVREM,
  1049. {DestDivReg, DestRemReg},
  1050. {MI.getOperand(1).getReg(), MI.getOperand(2).getReg()});
  1051. MI.eraseFromParent();
  1052. OtherMI->eraseFromParent();
  1053. }
  1054. bool CombinerHelper::matchOptBrCondByInvertingCond(MachineInstr &MI,
  1055. MachineInstr *&BrCond) {
  1056. assert(MI.getOpcode() == TargetOpcode::G_BR);
  1057. // Try to match the following:
  1058. // bb1:
  1059. // G_BRCOND %c1, %bb2
  1060. // G_BR %bb3
  1061. // bb2:
  1062. // ...
  1063. // bb3:
  1064. // The above pattern does not have a fall through to the successor bb2, always
  1065. // resulting in a branch no matter which path is taken. Here we try to find
  1066. // and replace that pattern with conditional branch to bb3 and otherwise
  1067. // fallthrough to bb2. This is generally better for branch predictors.
  1068. MachineBasicBlock *MBB = MI.getParent();
  1069. MachineBasicBlock::iterator BrIt(MI);
  1070. if (BrIt == MBB->begin())
  1071. return false;
  1072. assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator");
  1073. BrCond = &*std::prev(BrIt);
  1074. if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
  1075. return false;
  1076. // Check that the next block is the conditional branch target. Also make sure
  1077. // that it isn't the same as the G_BR's target (otherwise, this will loop.)
  1078. MachineBasicBlock *BrCondTarget = BrCond->getOperand(1).getMBB();
  1079. return BrCondTarget != MI.getOperand(0).getMBB() &&
  1080. MBB->isLayoutSuccessor(BrCondTarget);
  1081. }
  1082. void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI,
  1083. MachineInstr *&BrCond) {
  1084. MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
  1085. Builder.setInstrAndDebugLoc(*BrCond);
  1086. LLT Ty = MRI.getType(BrCond->getOperand(0).getReg());
  1087. // FIXME: Does int/fp matter for this? If so, we might need to restrict
  1088. // this to i1 only since we might not know for sure what kind of
  1089. // compare generated the condition value.
  1090. auto True = Builder.buildConstant(
  1091. Ty, getICmpTrueVal(getTargetLowering(), false, false));
  1092. auto Xor = Builder.buildXor(Ty, BrCond->getOperand(0), True);
  1093. auto *FallthroughBB = BrCond->getOperand(1).getMBB();
  1094. Observer.changingInstr(MI);
  1095. MI.getOperand(0).setMBB(FallthroughBB);
  1096. Observer.changedInstr(MI);
  1097. // Change the conditional branch to use the inverted condition and
  1098. // new target block.
  1099. Observer.changingInstr(*BrCond);
  1100. BrCond->getOperand(0).setReg(Xor.getReg(0));
  1101. BrCond->getOperand(1).setMBB(BrTarget);
  1102. Observer.changedInstr(*BrCond);
  1103. }
  1104. static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
  1105. if (Ty.isVector())
  1106. return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
  1107. Ty.getNumElements());
  1108. return IntegerType::get(C, Ty.getSizeInBits());
  1109. }
  1110. bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI) {
  1111. MachineIRBuilder HelperBuilder(MI);
  1112. GISelObserverWrapper DummyObserver;
  1113. LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
  1114. return Helper.lowerMemcpyInline(MI) ==
  1115. LegalizerHelper::LegalizeResult::Legalized;
  1116. }
  1117. bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
  1118. MachineIRBuilder HelperBuilder(MI);
  1119. GISelObserverWrapper DummyObserver;
  1120. LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
  1121. return Helper.lowerMemCpyFamily(MI, MaxLen) ==
  1122. LegalizerHelper::LegalizeResult::Legalized;
  1123. }
  1124. static std::optional<APFloat>
  1125. constantFoldFpUnary(unsigned Opcode, LLT DstTy, const Register Op,
  1126. const MachineRegisterInfo &MRI) {
  1127. const ConstantFP *MaybeCst = getConstantFPVRegVal(Op, MRI);
  1128. if (!MaybeCst)
  1129. return std::nullopt;
  1130. APFloat V = MaybeCst->getValueAPF();
  1131. switch (Opcode) {
  1132. default:
  1133. llvm_unreachable("Unexpected opcode!");
  1134. case TargetOpcode::G_FNEG: {
  1135. V.changeSign();
  1136. return V;
  1137. }
  1138. case TargetOpcode::G_FABS: {
  1139. V.clearSign();
  1140. return V;
  1141. }
  1142. case TargetOpcode::G_FPTRUNC:
  1143. break;
  1144. case TargetOpcode::G_FSQRT: {
  1145. bool Unused;
  1146. V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused);
  1147. V = APFloat(sqrt(V.convertToDouble()));
  1148. break;
  1149. }
  1150. case TargetOpcode::G_FLOG2: {
  1151. bool Unused;
  1152. V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused);
  1153. V = APFloat(log2(V.convertToDouble()));
  1154. break;
  1155. }
  1156. }
  1157. // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise,
  1158. // `buildFConstant` will assert on size mismatch. Only `G_FPTRUNC`, `G_FSQRT`,
  1159. // and `G_FLOG2` reach here.
  1160. bool Unused;
  1161. V.convert(getFltSemanticForLLT(DstTy), APFloat::rmNearestTiesToEven, &Unused);
  1162. return V;
  1163. }
  1164. bool CombinerHelper::matchCombineConstantFoldFpUnary(
  1165. MachineInstr &MI, std::optional<APFloat> &Cst) {
  1166. Register DstReg = MI.getOperand(0).getReg();
  1167. Register SrcReg = MI.getOperand(1).getReg();
  1168. LLT DstTy = MRI.getType(DstReg);
  1169. Cst = constantFoldFpUnary(MI.getOpcode(), DstTy, SrcReg, MRI);
  1170. return Cst.has_value();
  1171. }
  1172. void CombinerHelper::applyCombineConstantFoldFpUnary(
  1173. MachineInstr &MI, std::optional<APFloat> &Cst) {
  1174. assert(Cst && "Optional is unexpectedly empty!");
  1175. Builder.setInstrAndDebugLoc(MI);
  1176. MachineFunction &MF = Builder.getMF();
  1177. auto *FPVal = ConstantFP::get(MF.getFunction().getContext(), *Cst);
  1178. Register DstReg = MI.getOperand(0).getReg();
  1179. Builder.buildFConstant(DstReg, *FPVal);
  1180. MI.eraseFromParent();
  1181. }
  1182. bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI,
  1183. PtrAddChain &MatchInfo) {
  1184. // We're trying to match the following pattern:
  1185. // %t1 = G_PTR_ADD %base, G_CONSTANT imm1
  1186. // %root = G_PTR_ADD %t1, G_CONSTANT imm2
  1187. // -->
  1188. // %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2)
  1189. if (MI.getOpcode() != TargetOpcode::G_PTR_ADD)
  1190. return false;
  1191. Register Add2 = MI.getOperand(1).getReg();
  1192. Register Imm1 = MI.getOperand(2).getReg();
  1193. auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
  1194. if (!MaybeImmVal)
  1195. return false;
  1196. MachineInstr *Add2Def = MRI.getVRegDef(Add2);
  1197. if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
  1198. return false;
  1199. Register Base = Add2Def->getOperand(1).getReg();
  1200. Register Imm2 = Add2Def->getOperand(2).getReg();
  1201. auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
  1202. if (!MaybeImm2Val)
  1203. return false;
  1204. // Check if the new combined immediate forms an illegal addressing mode.
  1205. // Do not combine if it was legal before but would get illegal.
  1206. // To do so, we need to find a load/store user of the pointer to get
  1207. // the access type.
  1208. Type *AccessTy = nullptr;
  1209. auto &MF = *MI.getMF();
  1210. for (auto &UseMI : MRI.use_nodbg_instructions(MI.getOperand(0).getReg())) {
  1211. if (auto *LdSt = dyn_cast<GLoadStore>(&UseMI)) {
  1212. AccessTy = getTypeForLLT(MRI.getType(LdSt->getReg(0)),
  1213. MF.getFunction().getContext());
  1214. break;
  1215. }
  1216. }
  1217. TargetLoweringBase::AddrMode AMNew;
  1218. APInt CombinedImm = MaybeImmVal->Value + MaybeImm2Val->Value;
  1219. AMNew.BaseOffs = CombinedImm.getSExtValue();
  1220. if (AccessTy) {
  1221. AMNew.HasBaseReg = true;
  1222. TargetLoweringBase::AddrMode AMOld;
  1223. AMOld.BaseOffs = MaybeImm2Val->Value.getSExtValue();
  1224. AMOld.HasBaseReg = true;
  1225. unsigned AS = MRI.getType(Add2).getAddressSpace();
  1226. const auto &TLI = *MF.getSubtarget().getTargetLowering();
  1227. if (TLI.isLegalAddressingMode(MF.getDataLayout(), AMOld, AccessTy, AS) &&
  1228. !TLI.isLegalAddressingMode(MF.getDataLayout(), AMNew, AccessTy, AS))
  1229. return false;
  1230. }
  1231. // Pass the combined immediate to the apply function.
  1232. MatchInfo.Imm = AMNew.BaseOffs;
  1233. MatchInfo.Base = Base;
  1234. MatchInfo.Bank = getRegBank(Imm2);
  1235. return true;
  1236. }
  1237. void CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI,
  1238. PtrAddChain &MatchInfo) {
  1239. assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
  1240. MachineIRBuilder MIB(MI);
  1241. LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg());
  1242. auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm);
  1243. setRegBank(NewOffset.getReg(0), MatchInfo.Bank);
  1244. Observer.changingInstr(MI);
  1245. MI.getOperand(1).setReg(MatchInfo.Base);
  1246. MI.getOperand(2).setReg(NewOffset.getReg(0));
  1247. Observer.changedInstr(MI);
  1248. }
  1249. bool CombinerHelper::matchShiftImmedChain(MachineInstr &MI,
  1250. RegisterImmPair &MatchInfo) {
  1251. // We're trying to match the following pattern with any of
  1252. // G_SHL/G_ASHR/G_LSHR/G_SSHLSAT/G_USHLSAT shift instructions:
  1253. // %t1 = SHIFT %base, G_CONSTANT imm1
  1254. // %root = SHIFT %t1, G_CONSTANT imm2
  1255. // -->
  1256. // %root = SHIFT %base, G_CONSTANT (imm1 + imm2)
  1257. unsigned Opcode = MI.getOpcode();
  1258. assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
  1259. Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
  1260. Opcode == TargetOpcode::G_USHLSAT) &&
  1261. "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
  1262. Register Shl2 = MI.getOperand(1).getReg();
  1263. Register Imm1 = MI.getOperand(2).getReg();
  1264. auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
  1265. if (!MaybeImmVal)
  1266. return false;
  1267. MachineInstr *Shl2Def = MRI.getUniqueVRegDef(Shl2);
  1268. if (Shl2Def->getOpcode() != Opcode)
  1269. return false;
  1270. Register Base = Shl2Def->getOperand(1).getReg();
  1271. Register Imm2 = Shl2Def->getOperand(2).getReg();
  1272. auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
  1273. if (!MaybeImm2Val)
  1274. return false;
  1275. // Pass the combined immediate to the apply function.
  1276. MatchInfo.Imm =
  1277. (MaybeImmVal->Value.getSExtValue() + MaybeImm2Val->Value).getSExtValue();
  1278. MatchInfo.Reg = Base;
  1279. // There is no simple replacement for a saturating unsigned left shift that
  1280. // exceeds the scalar size.
  1281. if (Opcode == TargetOpcode::G_USHLSAT &&
  1282. MatchInfo.Imm >= MRI.getType(Shl2).getScalarSizeInBits())
  1283. return false;
  1284. return true;
  1285. }
  1286. void CombinerHelper::applyShiftImmedChain(MachineInstr &MI,
  1287. RegisterImmPair &MatchInfo) {
  1288. unsigned Opcode = MI.getOpcode();
  1289. assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
  1290. Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
  1291. Opcode == TargetOpcode::G_USHLSAT) &&
  1292. "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
  1293. Builder.setInstrAndDebugLoc(MI);
  1294. LLT Ty = MRI.getType(MI.getOperand(1).getReg());
  1295. unsigned const ScalarSizeInBits = Ty.getScalarSizeInBits();
  1296. auto Imm = MatchInfo.Imm;
  1297. if (Imm >= ScalarSizeInBits) {
  1298. // Any logical shift that exceeds scalar size will produce zero.
  1299. if (Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR) {
  1300. Builder.buildConstant(MI.getOperand(0), 0);
  1301. MI.eraseFromParent();
  1302. return;
  1303. }
  1304. // Arithmetic shift and saturating signed left shift have no effect beyond
  1305. // scalar size.
  1306. Imm = ScalarSizeInBits - 1;
  1307. }
  1308. LLT ImmTy = MRI.getType(MI.getOperand(2).getReg());
  1309. Register NewImm = Builder.buildConstant(ImmTy, Imm).getReg(0);
  1310. Observer.changingInstr(MI);
  1311. MI.getOperand(1).setReg(MatchInfo.Reg);
  1312. MI.getOperand(2).setReg(NewImm);
  1313. Observer.changedInstr(MI);
  1314. }
  1315. bool CombinerHelper::matchShiftOfShiftedLogic(MachineInstr &MI,
  1316. ShiftOfShiftedLogic &MatchInfo) {
  1317. // We're trying to match the following pattern with any of
  1318. // G_SHL/G_ASHR/G_LSHR/G_USHLSAT/G_SSHLSAT shift instructions in combination
  1319. // with any of G_AND/G_OR/G_XOR logic instructions.
  1320. // %t1 = SHIFT %X, G_CONSTANT C0
  1321. // %t2 = LOGIC %t1, %Y
  1322. // %root = SHIFT %t2, G_CONSTANT C1
  1323. // -->
  1324. // %t3 = SHIFT %X, G_CONSTANT (C0+C1)
  1325. // %t4 = SHIFT %Y, G_CONSTANT C1
  1326. // %root = LOGIC %t3, %t4
  1327. unsigned ShiftOpcode = MI.getOpcode();
  1328. assert((ShiftOpcode == TargetOpcode::G_SHL ||
  1329. ShiftOpcode == TargetOpcode::G_ASHR ||
  1330. ShiftOpcode == TargetOpcode::G_LSHR ||
  1331. ShiftOpcode == TargetOpcode::G_USHLSAT ||
  1332. ShiftOpcode == TargetOpcode::G_SSHLSAT) &&
  1333. "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
  1334. // Match a one-use bitwise logic op.
  1335. Register LogicDest = MI.getOperand(1).getReg();
  1336. if (!MRI.hasOneNonDBGUse(LogicDest))
  1337. return false;
  1338. MachineInstr *LogicMI = MRI.getUniqueVRegDef(LogicDest);
  1339. unsigned LogicOpcode = LogicMI->getOpcode();
  1340. if (LogicOpcode != TargetOpcode::G_AND && LogicOpcode != TargetOpcode::G_OR &&
  1341. LogicOpcode != TargetOpcode::G_XOR)
  1342. return false;
  1343. // Find a matching one-use shift by constant.
  1344. const Register C1 = MI.getOperand(2).getReg();
  1345. auto MaybeImmVal = getIConstantVRegValWithLookThrough(C1, MRI);
  1346. if (!MaybeImmVal)
  1347. return false;
  1348. const uint64_t C1Val = MaybeImmVal->Value.getZExtValue();
  1349. auto matchFirstShift = [&](const MachineInstr *MI, uint64_t &ShiftVal) {
  1350. // Shift should match previous one and should be a one-use.
  1351. if (MI->getOpcode() != ShiftOpcode ||
  1352. !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
  1353. return false;
  1354. // Must be a constant.
  1355. auto MaybeImmVal =
  1356. getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
  1357. if (!MaybeImmVal)
  1358. return false;
  1359. ShiftVal = MaybeImmVal->Value.getSExtValue();
  1360. return true;
  1361. };
  1362. // Logic ops are commutative, so check each operand for a match.
  1363. Register LogicMIReg1 = LogicMI->getOperand(1).getReg();
  1364. MachineInstr *LogicMIOp1 = MRI.getUniqueVRegDef(LogicMIReg1);
  1365. Register LogicMIReg2 = LogicMI->getOperand(2).getReg();
  1366. MachineInstr *LogicMIOp2 = MRI.getUniqueVRegDef(LogicMIReg2);
  1367. uint64_t C0Val;
  1368. if (matchFirstShift(LogicMIOp1, C0Val)) {
  1369. MatchInfo.LogicNonShiftReg = LogicMIReg2;
  1370. MatchInfo.Shift2 = LogicMIOp1;
  1371. } else if (matchFirstShift(LogicMIOp2, C0Val)) {
  1372. MatchInfo.LogicNonShiftReg = LogicMIReg1;
  1373. MatchInfo.Shift2 = LogicMIOp2;
  1374. } else
  1375. return false;
  1376. MatchInfo.ValSum = C0Val + C1Val;
  1377. // The fold is not valid if the sum of the shift values exceeds bitwidth.
  1378. if (MatchInfo.ValSum >= MRI.getType(LogicDest).getScalarSizeInBits())
  1379. return false;
  1380. MatchInfo.Logic = LogicMI;
  1381. return true;
  1382. }
  1383. void CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI,
  1384. ShiftOfShiftedLogic &MatchInfo) {
  1385. unsigned Opcode = MI.getOpcode();
  1386. assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
  1387. Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_USHLSAT ||
  1388. Opcode == TargetOpcode::G_SSHLSAT) &&
  1389. "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
  1390. LLT ShlType = MRI.getType(MI.getOperand(2).getReg());
  1391. LLT DestType = MRI.getType(MI.getOperand(0).getReg());
  1392. Builder.setInstrAndDebugLoc(MI);
  1393. Register Const = Builder.buildConstant(ShlType, MatchInfo.ValSum).getReg(0);
  1394. Register Shift1Base = MatchInfo.Shift2->getOperand(1).getReg();
  1395. Register Shift1 =
  1396. Builder.buildInstr(Opcode, {DestType}, {Shift1Base, Const}).getReg(0);
  1397. // If LogicNonShiftReg is the same to Shift1Base, and shift1 const is the same
  1398. // to MatchInfo.Shift2 const, CSEMIRBuilder will reuse the old shift1 when
  1399. // build shift2. So, if we erase MatchInfo.Shift2 at the end, actually we
  1400. // remove old shift1. And it will cause crash later. So erase it earlier to
  1401. // avoid the crash.
  1402. MatchInfo.Shift2->eraseFromParent();
  1403. Register Shift2Const = MI.getOperand(2).getReg();
  1404. Register Shift2 = Builder
  1405. .buildInstr(Opcode, {DestType},
  1406. {MatchInfo.LogicNonShiftReg, Shift2Const})
  1407. .getReg(0);
  1408. Register Dest = MI.getOperand(0).getReg();
  1409. Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2});
  1410. // This was one use so it's safe to remove it.
  1411. MatchInfo.Logic->eraseFromParent();
  1412. MI.eraseFromParent();
  1413. }
  1414. bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI,
  1415. unsigned &ShiftVal) {
  1416. assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
  1417. auto MaybeImmVal =
  1418. getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
  1419. if (!MaybeImmVal)
  1420. return false;
  1421. ShiftVal = MaybeImmVal->Value.exactLogBase2();
  1422. return (static_cast<int32_t>(ShiftVal) != -1);
  1423. }
  1424. void CombinerHelper::applyCombineMulToShl(MachineInstr &MI,
  1425. unsigned &ShiftVal) {
  1426. assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
  1427. MachineIRBuilder MIB(MI);
  1428. LLT ShiftTy = MRI.getType(MI.getOperand(0).getReg());
  1429. auto ShiftCst = MIB.buildConstant(ShiftTy, ShiftVal);
  1430. Observer.changingInstr(MI);
  1431. MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL));
  1432. MI.getOperand(2).setReg(ShiftCst.getReg(0));
  1433. Observer.changedInstr(MI);
  1434. }
  1435. // shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
  1436. bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI,
  1437. RegisterImmPair &MatchData) {
  1438. assert(MI.getOpcode() == TargetOpcode::G_SHL && KB);
  1439. Register LHS = MI.getOperand(1).getReg();
  1440. Register ExtSrc;
  1441. if (!mi_match(LHS, MRI, m_GAnyExt(m_Reg(ExtSrc))) &&
  1442. !mi_match(LHS, MRI, m_GZExt(m_Reg(ExtSrc))) &&
  1443. !mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc))))
  1444. return false;
  1445. // TODO: Should handle vector splat.
  1446. Register RHS = MI.getOperand(2).getReg();
  1447. auto MaybeShiftAmtVal = getIConstantVRegValWithLookThrough(RHS, MRI);
  1448. if (!MaybeShiftAmtVal)
  1449. return false;
  1450. if (LI) {
  1451. LLT SrcTy = MRI.getType(ExtSrc);
  1452. // We only really care about the legality with the shifted value. We can
  1453. // pick any type the constant shift amount, so ask the target what to
  1454. // use. Otherwise we would have to guess and hope it is reported as legal.
  1455. LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(SrcTy);
  1456. if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SHL, {SrcTy, ShiftAmtTy}}))
  1457. return false;
  1458. }
  1459. int64_t ShiftAmt = MaybeShiftAmtVal->Value.getSExtValue();
  1460. MatchData.Reg = ExtSrc;
  1461. MatchData.Imm = ShiftAmt;
  1462. unsigned MinLeadingZeros = KB->getKnownZeroes(ExtSrc).countLeadingOnes();
  1463. return MinLeadingZeros >= ShiftAmt;
  1464. }
  1465. void CombinerHelper::applyCombineShlOfExtend(MachineInstr &MI,
  1466. const RegisterImmPair &MatchData) {
  1467. Register ExtSrcReg = MatchData.Reg;
  1468. int64_t ShiftAmtVal = MatchData.Imm;
  1469. LLT ExtSrcTy = MRI.getType(ExtSrcReg);
  1470. Builder.setInstrAndDebugLoc(MI);
  1471. auto ShiftAmt = Builder.buildConstant(ExtSrcTy, ShiftAmtVal);
  1472. auto NarrowShift =
  1473. Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags());
  1474. Builder.buildZExt(MI.getOperand(0), NarrowShift);
  1475. MI.eraseFromParent();
  1476. }
  1477. bool CombinerHelper::matchCombineMergeUnmerge(MachineInstr &MI,
  1478. Register &MatchInfo) {
  1479. GMerge &Merge = cast<GMerge>(MI);
  1480. SmallVector<Register, 16> MergedValues;
  1481. for (unsigned I = 0; I < Merge.getNumSources(); ++I)
  1482. MergedValues.emplace_back(Merge.getSourceReg(I));
  1483. auto *Unmerge = getOpcodeDef<GUnmerge>(MergedValues[0], MRI);
  1484. if (!Unmerge || Unmerge->getNumDefs() != Merge.getNumSources())
  1485. return false;
  1486. for (unsigned I = 0; I < MergedValues.size(); ++I)
  1487. if (MergedValues[I] != Unmerge->getReg(I))
  1488. return false;
  1489. MatchInfo = Unmerge->getSourceReg();
  1490. return true;
  1491. }
  1492. static Register peekThroughBitcast(Register Reg,
  1493. const MachineRegisterInfo &MRI) {
  1494. while (mi_match(Reg, MRI, m_GBitcast(m_Reg(Reg))))
  1495. ;
  1496. return Reg;
  1497. }
  1498. bool CombinerHelper::matchCombineUnmergeMergeToPlainValues(
  1499. MachineInstr &MI, SmallVectorImpl<Register> &Operands) {
  1500. assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
  1501. "Expected an unmerge");
  1502. auto &Unmerge = cast<GUnmerge>(MI);
  1503. Register SrcReg = peekThroughBitcast(Unmerge.getSourceReg(), MRI);
  1504. auto *SrcInstr = getOpcodeDef<GMergeLikeInstr>(SrcReg, MRI);
  1505. if (!SrcInstr)
  1506. return false;
  1507. // Check the source type of the merge.
  1508. LLT SrcMergeTy = MRI.getType(SrcInstr->getSourceReg(0));
  1509. LLT Dst0Ty = MRI.getType(Unmerge.getReg(0));
  1510. bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits();
  1511. if (SrcMergeTy != Dst0Ty && !SameSize)
  1512. return false;
  1513. // They are the same now (modulo a bitcast).
  1514. // We can collect all the src registers.
  1515. for (unsigned Idx = 0; Idx < SrcInstr->getNumSources(); ++Idx)
  1516. Operands.push_back(SrcInstr->getSourceReg(Idx));
  1517. return true;
  1518. }
  1519. void CombinerHelper::applyCombineUnmergeMergeToPlainValues(
  1520. MachineInstr &MI, SmallVectorImpl<Register> &Operands) {
  1521. assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
  1522. "Expected an unmerge");
  1523. assert((MI.getNumOperands() - 1 == Operands.size()) &&
  1524. "Not enough operands to replace all defs");
  1525. unsigned NumElems = MI.getNumOperands() - 1;
  1526. LLT SrcTy = MRI.getType(Operands[0]);
  1527. LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
  1528. bool CanReuseInputDirectly = DstTy == SrcTy;
  1529. Builder.setInstrAndDebugLoc(MI);
  1530. for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
  1531. Register DstReg = MI.getOperand(Idx).getReg();
  1532. Register SrcReg = Operands[Idx];
  1533. if (CanReuseInputDirectly)
  1534. replaceRegWith(MRI, DstReg, SrcReg);
  1535. else
  1536. Builder.buildCast(DstReg, SrcReg);
  1537. }
  1538. MI.eraseFromParent();
  1539. }
  1540. bool CombinerHelper::matchCombineUnmergeConstant(MachineInstr &MI,
  1541. SmallVectorImpl<APInt> &Csts) {
  1542. unsigned SrcIdx = MI.getNumOperands() - 1;
  1543. Register SrcReg = MI.getOperand(SrcIdx).getReg();
  1544. MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
  1545. if (SrcInstr->getOpcode() != TargetOpcode::G_CONSTANT &&
  1546. SrcInstr->getOpcode() != TargetOpcode::G_FCONSTANT)
  1547. return false;
  1548. // Break down the big constant in smaller ones.
  1549. const MachineOperand &CstVal = SrcInstr->getOperand(1);
  1550. APInt Val = SrcInstr->getOpcode() == TargetOpcode::G_CONSTANT
  1551. ? CstVal.getCImm()->getValue()
  1552. : CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
  1553. LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
  1554. unsigned ShiftAmt = Dst0Ty.getSizeInBits();
  1555. // Unmerge a constant.
  1556. for (unsigned Idx = 0; Idx != SrcIdx; ++Idx) {
  1557. Csts.emplace_back(Val.trunc(ShiftAmt));
  1558. Val = Val.lshr(ShiftAmt);
  1559. }
  1560. return true;
  1561. }
  1562. void CombinerHelper::applyCombineUnmergeConstant(MachineInstr &MI,
  1563. SmallVectorImpl<APInt> &Csts) {
  1564. assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
  1565. "Expected an unmerge");
  1566. assert((MI.getNumOperands() - 1 == Csts.size()) &&
  1567. "Not enough operands to replace all defs");
  1568. unsigned NumElems = MI.getNumOperands() - 1;
  1569. Builder.setInstrAndDebugLoc(MI);
  1570. for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
  1571. Register DstReg = MI.getOperand(Idx).getReg();
  1572. Builder.buildConstant(DstReg, Csts[Idx]);
  1573. }
  1574. MI.eraseFromParent();
  1575. }
  1576. bool CombinerHelper::matchCombineUnmergeUndef(
  1577. MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
  1578. unsigned SrcIdx = MI.getNumOperands() - 1;
  1579. Register SrcReg = MI.getOperand(SrcIdx).getReg();
  1580. MatchInfo = [&MI](MachineIRBuilder &B) {
  1581. unsigned NumElems = MI.getNumOperands() - 1;
  1582. for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
  1583. Register DstReg = MI.getOperand(Idx).getReg();
  1584. B.buildUndef(DstReg);
  1585. }
  1586. };
  1587. return isa<GImplicitDef>(MRI.getVRegDef(SrcReg));
  1588. }
  1589. bool CombinerHelper::matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
  1590. assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
  1591. "Expected an unmerge");
  1592. // Check that all the lanes are dead except the first one.
  1593. for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
  1594. if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg()))
  1595. return false;
  1596. }
  1597. return true;
  1598. }
  1599. void CombinerHelper::applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
  1600. Builder.setInstrAndDebugLoc(MI);
  1601. Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
  1602. // Truncating a vector is going to truncate every single lane,
  1603. // whereas we want the full lowbits.
  1604. // Do the operation on a scalar instead.
  1605. LLT SrcTy = MRI.getType(SrcReg);
  1606. if (SrcTy.isVector())
  1607. SrcReg =
  1608. Builder.buildCast(LLT::scalar(SrcTy.getSizeInBits()), SrcReg).getReg(0);
  1609. Register Dst0Reg = MI.getOperand(0).getReg();
  1610. LLT Dst0Ty = MRI.getType(Dst0Reg);
  1611. if (Dst0Ty.isVector()) {
  1612. auto MIB = Builder.buildTrunc(LLT::scalar(Dst0Ty.getSizeInBits()), SrcReg);
  1613. Builder.buildCast(Dst0Reg, MIB);
  1614. } else
  1615. Builder.buildTrunc(Dst0Reg, SrcReg);
  1616. MI.eraseFromParent();
  1617. }
  1618. bool CombinerHelper::matchCombineUnmergeZExtToZExt(MachineInstr &MI) {
  1619. assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
  1620. "Expected an unmerge");
  1621. Register Dst0Reg = MI.getOperand(0).getReg();
  1622. LLT Dst0Ty = MRI.getType(Dst0Reg);
  1623. // G_ZEXT on vector applies to each lane, so it will
  1624. // affect all destinations. Therefore we won't be able
  1625. // to simplify the unmerge to just the first definition.
  1626. if (Dst0Ty.isVector())
  1627. return false;
  1628. Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
  1629. LLT SrcTy = MRI.getType(SrcReg);
  1630. if (SrcTy.isVector())
  1631. return false;
  1632. Register ZExtSrcReg;
  1633. if (!mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZExtSrcReg))))
  1634. return false;
  1635. // Finally we can replace the first definition with
  1636. // a zext of the source if the definition is big enough to hold
  1637. // all of ZExtSrc bits.
  1638. LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
  1639. return ZExtSrcTy.getSizeInBits() <= Dst0Ty.getSizeInBits();
  1640. }
  1641. void CombinerHelper::applyCombineUnmergeZExtToZExt(MachineInstr &MI) {
  1642. assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
  1643. "Expected an unmerge");
  1644. Register Dst0Reg = MI.getOperand(0).getReg();
  1645. MachineInstr *ZExtInstr =
  1646. MRI.getVRegDef(MI.getOperand(MI.getNumDefs()).getReg());
  1647. assert(ZExtInstr && ZExtInstr->getOpcode() == TargetOpcode::G_ZEXT &&
  1648. "Expecting a G_ZEXT");
  1649. Register ZExtSrcReg = ZExtInstr->getOperand(1).getReg();
  1650. LLT Dst0Ty = MRI.getType(Dst0Reg);
  1651. LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
  1652. Builder.setInstrAndDebugLoc(MI);
  1653. if (Dst0Ty.getSizeInBits() > ZExtSrcTy.getSizeInBits()) {
  1654. Builder.buildZExt(Dst0Reg, ZExtSrcReg);
  1655. } else {
  1656. assert(Dst0Ty.getSizeInBits() == ZExtSrcTy.getSizeInBits() &&
  1657. "ZExt src doesn't fit in destination");
  1658. replaceRegWith(MRI, Dst0Reg, ZExtSrcReg);
  1659. }
  1660. Register ZeroReg;
  1661. for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
  1662. if (!ZeroReg)
  1663. ZeroReg = Builder.buildConstant(Dst0Ty, 0).getReg(0);
  1664. replaceRegWith(MRI, MI.getOperand(Idx).getReg(), ZeroReg);
  1665. }
  1666. MI.eraseFromParent();
  1667. }
  1668. bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI,
  1669. unsigned TargetShiftSize,
  1670. unsigned &ShiftVal) {
  1671. assert((MI.getOpcode() == TargetOpcode::G_SHL ||
  1672. MI.getOpcode() == TargetOpcode::G_LSHR ||
  1673. MI.getOpcode() == TargetOpcode::G_ASHR) && "Expected a shift");
  1674. LLT Ty = MRI.getType(MI.getOperand(0).getReg());
  1675. if (Ty.isVector()) // TODO:
  1676. return false;
  1677. // Don't narrow further than the requested size.
  1678. unsigned Size = Ty.getSizeInBits();
  1679. if (Size <= TargetShiftSize)
  1680. return false;
  1681. auto MaybeImmVal =
  1682. getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
  1683. if (!MaybeImmVal)
  1684. return false;
  1685. ShiftVal = MaybeImmVal->Value.getSExtValue();
  1686. return ShiftVal >= Size / 2 && ShiftVal < Size;
  1687. }
  1688. void CombinerHelper::applyCombineShiftToUnmerge(MachineInstr &MI,
  1689. const unsigned &ShiftVal) {
  1690. Register DstReg = MI.getOperand(0).getReg();
  1691. Register SrcReg = MI.getOperand(1).getReg();
  1692. LLT Ty = MRI.getType(SrcReg);
  1693. unsigned Size = Ty.getSizeInBits();
  1694. unsigned HalfSize = Size / 2;
  1695. assert(ShiftVal >= HalfSize);
  1696. LLT HalfTy = LLT::scalar(HalfSize);
  1697. Builder.setInstr(MI);
  1698. auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg);
  1699. unsigned NarrowShiftAmt = ShiftVal - HalfSize;
  1700. if (MI.getOpcode() == TargetOpcode::G_LSHR) {
  1701. Register Narrowed = Unmerge.getReg(1);
  1702. // dst = G_LSHR s64:x, C for C >= 32
  1703. // =>
  1704. // lo, hi = G_UNMERGE_VALUES x
  1705. // dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0
  1706. if (NarrowShiftAmt != 0) {
  1707. Narrowed = Builder.buildLShr(HalfTy, Narrowed,
  1708. Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
  1709. }
  1710. auto Zero = Builder.buildConstant(HalfTy, 0);
  1711. Builder.buildMergeLikeInstr(DstReg, {Narrowed, Zero});
  1712. } else if (MI.getOpcode() == TargetOpcode::G_SHL) {
  1713. Register Narrowed = Unmerge.getReg(0);
  1714. // dst = G_SHL s64:x, C for C >= 32
  1715. // =>
  1716. // lo, hi = G_UNMERGE_VALUES x
  1717. // dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32)
  1718. if (NarrowShiftAmt != 0) {
  1719. Narrowed = Builder.buildShl(HalfTy, Narrowed,
  1720. Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
  1721. }
  1722. auto Zero = Builder.buildConstant(HalfTy, 0);
  1723. Builder.buildMergeLikeInstr(DstReg, {Zero, Narrowed});
  1724. } else {
  1725. assert(MI.getOpcode() == TargetOpcode::G_ASHR);
  1726. auto Hi = Builder.buildAShr(
  1727. HalfTy, Unmerge.getReg(1),
  1728. Builder.buildConstant(HalfTy, HalfSize - 1));
  1729. if (ShiftVal == HalfSize) {
  1730. // (G_ASHR i64:x, 32) ->
  1731. // G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31)
  1732. Builder.buildMergeLikeInstr(DstReg, {Unmerge.getReg(1), Hi});
  1733. } else if (ShiftVal == Size - 1) {
  1734. // Don't need a second shift.
  1735. // (G_ASHR i64:x, 63) ->
  1736. // %narrowed = (G_ASHR hi_32(x), 31)
  1737. // G_MERGE_VALUES %narrowed, %narrowed
  1738. Builder.buildMergeLikeInstr(DstReg, {Hi, Hi});
  1739. } else {
  1740. auto Lo = Builder.buildAShr(
  1741. HalfTy, Unmerge.getReg(1),
  1742. Builder.buildConstant(HalfTy, ShiftVal - HalfSize));
  1743. // (G_ASHR i64:x, C) ->, for C >= 32
  1744. // G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31)
  1745. Builder.buildMergeLikeInstr(DstReg, {Lo, Hi});
  1746. }
  1747. }
  1748. MI.eraseFromParent();
  1749. }
  1750. bool CombinerHelper::tryCombineShiftToUnmerge(MachineInstr &MI,
  1751. unsigned TargetShiftAmount) {
  1752. unsigned ShiftAmt;
  1753. if (matchCombineShiftToUnmerge(MI, TargetShiftAmount, ShiftAmt)) {
  1754. applyCombineShiftToUnmerge(MI, ShiftAmt);
  1755. return true;
  1756. }
  1757. return false;
  1758. }
  1759. bool CombinerHelper::matchCombineI2PToP2I(MachineInstr &MI, Register &Reg) {
  1760. assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
  1761. Register DstReg = MI.getOperand(0).getReg();
  1762. LLT DstTy = MRI.getType(DstReg);
  1763. Register SrcReg = MI.getOperand(1).getReg();
  1764. return mi_match(SrcReg, MRI,
  1765. m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg))));
  1766. }
  1767. void CombinerHelper::applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) {
  1768. assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
  1769. Register DstReg = MI.getOperand(0).getReg();
  1770. Builder.setInstr(MI);
  1771. Builder.buildCopy(DstReg, Reg);
  1772. MI.eraseFromParent();
  1773. }
  1774. void CombinerHelper::applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) {
  1775. assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
  1776. Register DstReg = MI.getOperand(0).getReg();
  1777. Builder.setInstr(MI);
  1778. Builder.buildZExtOrTrunc(DstReg, Reg);
  1779. MI.eraseFromParent();
  1780. }
  1781. bool CombinerHelper::matchCombineAddP2IToPtrAdd(
  1782. MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
  1783. assert(MI.getOpcode() == TargetOpcode::G_ADD);
  1784. Register LHS = MI.getOperand(1).getReg();
  1785. Register RHS = MI.getOperand(2).getReg();
  1786. LLT IntTy = MRI.getType(LHS);
  1787. // G_PTR_ADD always has the pointer in the LHS, so we may need to commute the
  1788. // instruction.
  1789. PtrReg.second = false;
  1790. for (Register SrcReg : {LHS, RHS}) {
  1791. if (mi_match(SrcReg, MRI, m_GPtrToInt(m_Reg(PtrReg.first)))) {
  1792. // Don't handle cases where the integer is implicitly converted to the
  1793. // pointer width.
  1794. LLT PtrTy = MRI.getType(PtrReg.first);
  1795. if (PtrTy.getScalarSizeInBits() == IntTy.getScalarSizeInBits())
  1796. return true;
  1797. }
  1798. PtrReg.second = true;
  1799. }
  1800. return false;
  1801. }
  1802. void CombinerHelper::applyCombineAddP2IToPtrAdd(
  1803. MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
  1804. Register Dst = MI.getOperand(0).getReg();
  1805. Register LHS = MI.getOperand(1).getReg();
  1806. Register RHS = MI.getOperand(2).getReg();
  1807. const bool DoCommute = PtrReg.second;
  1808. if (DoCommute)
  1809. std::swap(LHS, RHS);
  1810. LHS = PtrReg.first;
  1811. LLT PtrTy = MRI.getType(LHS);
  1812. Builder.setInstrAndDebugLoc(MI);
  1813. auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS);
  1814. Builder.buildPtrToInt(Dst, PtrAdd);
  1815. MI.eraseFromParent();
  1816. }
  1817. bool CombinerHelper::matchCombineConstPtrAddToI2P(MachineInstr &MI,
  1818. APInt &NewCst) {
  1819. auto &PtrAdd = cast<GPtrAdd>(MI);
  1820. Register LHS = PtrAdd.getBaseReg();
  1821. Register RHS = PtrAdd.getOffsetReg();
  1822. MachineRegisterInfo &MRI = Builder.getMF().getRegInfo();
  1823. if (auto RHSCst = getIConstantVRegVal(RHS, MRI)) {
  1824. APInt Cst;
  1825. if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) {
  1826. auto DstTy = MRI.getType(PtrAdd.getReg(0));
  1827. // G_INTTOPTR uses zero-extension
  1828. NewCst = Cst.zextOrTrunc(DstTy.getSizeInBits());
  1829. NewCst += RHSCst->sextOrTrunc(DstTy.getSizeInBits());
  1830. return true;
  1831. }
  1832. }
  1833. return false;
  1834. }
  1835. void CombinerHelper::applyCombineConstPtrAddToI2P(MachineInstr &MI,
  1836. APInt &NewCst) {
  1837. auto &PtrAdd = cast<GPtrAdd>(MI);
  1838. Register Dst = PtrAdd.getReg(0);
  1839. Builder.setInstrAndDebugLoc(MI);
  1840. Builder.buildConstant(Dst, NewCst);
  1841. PtrAdd.eraseFromParent();
  1842. }
  1843. bool CombinerHelper::matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) {
  1844. assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT");
  1845. Register DstReg = MI.getOperand(0).getReg();
  1846. Register SrcReg = MI.getOperand(1).getReg();
  1847. LLT DstTy = MRI.getType(DstReg);
  1848. return mi_match(SrcReg, MRI,
  1849. m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))));
  1850. }
  1851. bool CombinerHelper::matchCombineZextTrunc(MachineInstr &MI, Register &Reg) {
  1852. assert(MI.getOpcode() == TargetOpcode::G_ZEXT && "Expected a G_ZEXT");
  1853. Register DstReg = MI.getOperand(0).getReg();
  1854. Register SrcReg = MI.getOperand(1).getReg();
  1855. LLT DstTy = MRI.getType(DstReg);
  1856. if (mi_match(SrcReg, MRI,
  1857. m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))))) {
  1858. unsigned DstSize = DstTy.getScalarSizeInBits();
  1859. unsigned SrcSize = MRI.getType(SrcReg).getScalarSizeInBits();
  1860. return KB->getKnownBits(Reg).countMinLeadingZeros() >= DstSize - SrcSize;
  1861. }
  1862. return false;
  1863. }
  1864. bool CombinerHelper::matchCombineExtOfExt(
  1865. MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
  1866. assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||
  1867. MI.getOpcode() == TargetOpcode::G_SEXT ||
  1868. MI.getOpcode() == TargetOpcode::G_ZEXT) &&
  1869. "Expected a G_[ASZ]EXT");
  1870. Register SrcReg = MI.getOperand(1).getReg();
  1871. MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
  1872. // Match exts with the same opcode, anyext([sz]ext) and sext(zext).
  1873. unsigned Opc = MI.getOpcode();
  1874. unsigned SrcOpc = SrcMI->getOpcode();
  1875. if (Opc == SrcOpc ||
  1876. (Opc == TargetOpcode::G_ANYEXT &&
  1877. (SrcOpc == TargetOpcode::G_SEXT || SrcOpc == TargetOpcode::G_ZEXT)) ||
  1878. (Opc == TargetOpcode::G_SEXT && SrcOpc == TargetOpcode::G_ZEXT)) {
  1879. MatchInfo = std::make_tuple(SrcMI->getOperand(1).getReg(), SrcOpc);
  1880. return true;
  1881. }
  1882. return false;
  1883. }
  1884. void CombinerHelper::applyCombineExtOfExt(
  1885. MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
  1886. assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||
  1887. MI.getOpcode() == TargetOpcode::G_SEXT ||
  1888. MI.getOpcode() == TargetOpcode::G_ZEXT) &&
  1889. "Expected a G_[ASZ]EXT");
  1890. Register Reg = std::get<0>(MatchInfo);
  1891. unsigned SrcExtOp = std::get<1>(MatchInfo);
  1892. // Combine exts with the same opcode.
  1893. if (MI.getOpcode() == SrcExtOp) {
  1894. Observer.changingInstr(MI);
  1895. MI.getOperand(1).setReg(Reg);
  1896. Observer.changedInstr(MI);
  1897. return;
  1898. }
  1899. // Combine:
  1900. // - anyext([sz]ext x) to [sz]ext x
  1901. // - sext(zext x) to zext x
  1902. if (MI.getOpcode() == TargetOpcode::G_ANYEXT ||
  1903. (MI.getOpcode() == TargetOpcode::G_SEXT &&
  1904. SrcExtOp == TargetOpcode::G_ZEXT)) {
  1905. Register DstReg = MI.getOperand(0).getReg();
  1906. Builder.setInstrAndDebugLoc(MI);
  1907. Builder.buildInstr(SrcExtOp, {DstReg}, {Reg});
  1908. MI.eraseFromParent();
  1909. }
  1910. }
  1911. void CombinerHelper::applyCombineMulByNegativeOne(MachineInstr &MI) {
  1912. assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
  1913. Register DstReg = MI.getOperand(0).getReg();
  1914. Register SrcReg = MI.getOperand(1).getReg();
  1915. LLT DstTy = MRI.getType(DstReg);
  1916. Builder.setInstrAndDebugLoc(MI);
  1917. Builder.buildSub(DstReg, Builder.buildConstant(DstTy, 0), SrcReg,
  1918. MI.getFlags());
  1919. MI.eraseFromParent();
  1920. }
  1921. bool CombinerHelper::matchCombineFAbsOfFNeg(MachineInstr &MI,
  1922. BuildFnTy &MatchInfo) {
  1923. assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS");
  1924. Register Src = MI.getOperand(1).getReg();
  1925. Register NegSrc;
  1926. if (!mi_match(Src, MRI, m_GFNeg(m_Reg(NegSrc))))
  1927. return false;
  1928. MatchInfo = [=, &MI](MachineIRBuilder &B) {
  1929. Observer.changingInstr(MI);
  1930. MI.getOperand(1).setReg(NegSrc);
  1931. Observer.changedInstr(MI);
  1932. };
  1933. return true;
  1934. }
  1935. bool CombinerHelper::matchCombineTruncOfExt(
  1936. MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
  1937. assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
  1938. Register SrcReg = MI.getOperand(1).getReg();
  1939. MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
  1940. unsigned SrcOpc = SrcMI->getOpcode();
  1941. if (SrcOpc == TargetOpcode::G_ANYEXT || SrcOpc == TargetOpcode::G_SEXT ||
  1942. SrcOpc == TargetOpcode::G_ZEXT) {
  1943. MatchInfo = std::make_pair(SrcMI->getOperand(1).getReg(), SrcOpc);
  1944. return true;
  1945. }
  1946. return false;
  1947. }
  1948. void CombinerHelper::applyCombineTruncOfExt(
  1949. MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
  1950. assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
  1951. Register SrcReg = MatchInfo.first;
  1952. unsigned SrcExtOp = MatchInfo.second;
  1953. Register DstReg = MI.getOperand(0).getReg();
  1954. LLT SrcTy = MRI.getType(SrcReg);
  1955. LLT DstTy = MRI.getType(DstReg);
  1956. if (SrcTy == DstTy) {
  1957. MI.eraseFromParent();
  1958. replaceRegWith(MRI, DstReg, SrcReg);
  1959. return;
  1960. }
  1961. Builder.setInstrAndDebugLoc(MI);
  1962. if (SrcTy.getSizeInBits() < DstTy.getSizeInBits())
  1963. Builder.buildInstr(SrcExtOp, {DstReg}, {SrcReg});
  1964. else
  1965. Builder.buildTrunc(DstReg, SrcReg);
  1966. MI.eraseFromParent();
  1967. }
  1968. static LLT getMidVTForTruncRightShiftCombine(LLT ShiftTy, LLT TruncTy) {
  1969. const unsigned ShiftSize = ShiftTy.getScalarSizeInBits();
  1970. const unsigned TruncSize = TruncTy.getScalarSizeInBits();
  1971. // ShiftTy > 32 > TruncTy -> 32
  1972. if (ShiftSize > 32 && TruncSize < 32)
  1973. return ShiftTy.changeElementSize(32);
  1974. // TODO: We could also reduce to 16 bits, but that's more target-dependent.
  1975. // Some targets like it, some don't, some only like it under certain
  1976. // conditions/processor versions, etc.
  1977. // A TL hook might be needed for this.
  1978. // Don't combine
  1979. return ShiftTy;
  1980. }
  1981. bool CombinerHelper::matchCombineTruncOfShift(
  1982. MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) {
  1983. assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
  1984. Register DstReg = MI.getOperand(0).getReg();
  1985. Register SrcReg = MI.getOperand(1).getReg();
  1986. if (!MRI.hasOneNonDBGUse(SrcReg))
  1987. return false;
  1988. LLT SrcTy = MRI.getType(SrcReg);
  1989. LLT DstTy = MRI.getType(DstReg);
  1990. MachineInstr *SrcMI = getDefIgnoringCopies(SrcReg, MRI);
  1991. const auto &TL = getTargetLowering();
  1992. LLT NewShiftTy;
  1993. switch (SrcMI->getOpcode()) {
  1994. default:
  1995. return false;
  1996. case TargetOpcode::G_SHL: {
  1997. NewShiftTy = DstTy;
  1998. // Make sure new shift amount is legal.
  1999. KnownBits Known = KB->getKnownBits(SrcMI->getOperand(2).getReg());
  2000. if (Known.getMaxValue().uge(NewShiftTy.getScalarSizeInBits()))
  2001. return false;
  2002. break;
  2003. }
  2004. case TargetOpcode::G_LSHR:
  2005. case TargetOpcode::G_ASHR: {
  2006. // For right shifts, we conservatively do not do the transform if the TRUNC
  2007. // has any STORE users. The reason is that if we change the type of the
  2008. // shift, we may break the truncstore combine.
  2009. //
  2010. // TODO: Fix truncstore combine to handle (trunc(lshr (trunc x), k)).
  2011. for (auto &User : MRI.use_instructions(DstReg))
  2012. if (User.getOpcode() == TargetOpcode::G_STORE)
  2013. return false;
  2014. NewShiftTy = getMidVTForTruncRightShiftCombine(SrcTy, DstTy);
  2015. if (NewShiftTy == SrcTy)
  2016. return false;
  2017. // Make sure we won't lose information by truncating the high bits.
  2018. KnownBits Known = KB->getKnownBits(SrcMI->getOperand(2).getReg());
  2019. if (Known.getMaxValue().ugt(NewShiftTy.getScalarSizeInBits() -
  2020. DstTy.getScalarSizeInBits()))
  2021. return false;
  2022. break;
  2023. }
  2024. }
  2025. if (!isLegalOrBeforeLegalizer(
  2026. {SrcMI->getOpcode(),
  2027. {NewShiftTy, TL.getPreferredShiftAmountTy(NewShiftTy)}}))
  2028. return false;
  2029. MatchInfo = std::make_pair(SrcMI, NewShiftTy);
  2030. return true;
  2031. }
  2032. void CombinerHelper::applyCombineTruncOfShift(
  2033. MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) {
  2034. Builder.setInstrAndDebugLoc(MI);
  2035. MachineInstr *ShiftMI = MatchInfo.first;
  2036. LLT NewShiftTy = MatchInfo.second;
  2037. Register Dst = MI.getOperand(0).getReg();
  2038. LLT DstTy = MRI.getType(Dst);
  2039. Register ShiftAmt = ShiftMI->getOperand(2).getReg();
  2040. Register ShiftSrc = ShiftMI->getOperand(1).getReg();
  2041. ShiftSrc = Builder.buildTrunc(NewShiftTy, ShiftSrc).getReg(0);
  2042. Register NewShift =
  2043. Builder
  2044. .buildInstr(ShiftMI->getOpcode(), {NewShiftTy}, {ShiftSrc, ShiftAmt})
  2045. .getReg(0);
  2046. if (NewShiftTy == DstTy)
  2047. replaceRegWith(MRI, Dst, NewShift);
  2048. else
  2049. Builder.buildTrunc(Dst, NewShift);
  2050. eraseInst(MI);
  2051. }
  2052. bool CombinerHelper::matchAnyExplicitUseIsUndef(MachineInstr &MI) {
  2053. return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
  2054. return MO.isReg() &&
  2055. getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
  2056. });
  2057. }
  2058. bool CombinerHelper::matchAllExplicitUsesAreUndef(MachineInstr &MI) {
  2059. return all_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
  2060. return !MO.isReg() ||
  2061. getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
  2062. });
  2063. }
  2064. bool CombinerHelper::matchUndefShuffleVectorMask(MachineInstr &MI) {
  2065. assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
  2066. ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
  2067. return all_of(Mask, [](int Elt) { return Elt < 0; });
  2068. }
  2069. bool CombinerHelper::matchUndefStore(MachineInstr &MI) {
  2070. assert(MI.getOpcode() == TargetOpcode::G_STORE);
  2071. return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(0).getReg(),
  2072. MRI);
  2073. }
  2074. bool CombinerHelper::matchUndefSelectCmp(MachineInstr &MI) {
  2075. assert(MI.getOpcode() == TargetOpcode::G_SELECT);
  2076. return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(),
  2077. MRI);
  2078. }
  2079. bool CombinerHelper::matchInsertExtractVecEltOutOfBounds(MachineInstr &MI) {
  2080. assert((MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT ||
  2081. MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) &&
  2082. "Expected an insert/extract element op");
  2083. LLT VecTy = MRI.getType(MI.getOperand(1).getReg());
  2084. unsigned IdxIdx =
  2085. MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
  2086. auto Idx = getIConstantVRegVal(MI.getOperand(IdxIdx).getReg(), MRI);
  2087. if (!Idx)
  2088. return false;
  2089. return Idx->getZExtValue() >= VecTy.getNumElements();
  2090. }
  2091. bool CombinerHelper::matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) {
  2092. GSelect &SelMI = cast<GSelect>(MI);
  2093. auto Cst =
  2094. isConstantOrConstantSplatVector(*MRI.getVRegDef(SelMI.getCondReg()), MRI);
  2095. if (!Cst)
  2096. return false;
  2097. OpIdx = Cst->isZero() ? 3 : 2;
  2098. return true;
  2099. }
  2100. bool CombinerHelper::eraseInst(MachineInstr &MI) {
  2101. MI.eraseFromParent();
  2102. return true;
  2103. }
  2104. bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1,
  2105. const MachineOperand &MOP2) {
  2106. if (!MOP1.isReg() || !MOP2.isReg())
  2107. return false;
  2108. auto InstAndDef1 = getDefSrcRegIgnoringCopies(MOP1.getReg(), MRI);
  2109. if (!InstAndDef1)
  2110. return false;
  2111. auto InstAndDef2 = getDefSrcRegIgnoringCopies(MOP2.getReg(), MRI);
  2112. if (!InstAndDef2)
  2113. return false;
  2114. MachineInstr *I1 = InstAndDef1->MI;
  2115. MachineInstr *I2 = InstAndDef2->MI;
  2116. // Handle a case like this:
  2117. //
  2118. // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<2 x s64>)
  2119. //
  2120. // Even though %0 and %1 are produced by the same instruction they are not
  2121. // the same values.
  2122. if (I1 == I2)
  2123. return MOP1.getReg() == MOP2.getReg();
  2124. // If we have an instruction which loads or stores, we can't guarantee that
  2125. // it is identical.
  2126. //
  2127. // For example, we may have
  2128. //
  2129. // %x1 = G_LOAD %addr (load N from @somewhere)
  2130. // ...
  2131. // call @foo
  2132. // ...
  2133. // %x2 = G_LOAD %addr (load N from @somewhere)
  2134. // ...
  2135. // %or = G_OR %x1, %x2
  2136. //
  2137. // It's possible that @foo will modify whatever lives at the address we're
  2138. // loading from. To be safe, let's just assume that all loads and stores
  2139. // are different (unless we have something which is guaranteed to not
  2140. // change.)
  2141. if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad())
  2142. return false;
  2143. // If both instructions are loads or stores, they are equal only if both
  2144. // are dereferenceable invariant loads with the same number of bits.
  2145. if (I1->mayLoadOrStore() && I2->mayLoadOrStore()) {
  2146. GLoadStore *LS1 = dyn_cast<GLoadStore>(I1);
  2147. GLoadStore *LS2 = dyn_cast<GLoadStore>(I2);
  2148. if (!LS1 || !LS2)
  2149. return false;
  2150. if (!I2->isDereferenceableInvariantLoad() ||
  2151. (LS1->getMemSizeInBits() != LS2->getMemSizeInBits()))
  2152. return false;
  2153. }
  2154. // Check for physical registers on the instructions first to avoid cases
  2155. // like this:
  2156. //
  2157. // %a = COPY $physreg
  2158. // ...
  2159. // SOMETHING implicit-def $physreg
  2160. // ...
  2161. // %b = COPY $physreg
  2162. //
  2163. // These copies are not equivalent.
  2164. if (any_of(I1->uses(), [](const MachineOperand &MO) {
  2165. return MO.isReg() && MO.getReg().isPhysical();
  2166. })) {
  2167. // Check if we have a case like this:
  2168. //
  2169. // %a = COPY $physreg
  2170. // %b = COPY %a
  2171. //
  2172. // In this case, I1 and I2 will both be equal to %a = COPY $physreg.
  2173. // From that, we know that they must have the same value, since they must
  2174. // have come from the same COPY.
  2175. return I1->isIdenticalTo(*I2);
  2176. }
  2177. // We don't have any physical registers, so we don't necessarily need the
  2178. // same vreg defs.
  2179. //
  2180. // On the off-chance that there's some target instruction feeding into the
  2181. // instruction, let's use produceSameValue instead of isIdenticalTo.
  2182. if (Builder.getTII().produceSameValue(*I1, *I2, &MRI)) {
  2183. // Handle instructions with multiple defs that produce same values. Values
  2184. // are same for operands with same index.
  2185. // %0:_(s8), %1:_(s8), %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
  2186. // %5:_(s8), %6:_(s8), %7:_(s8), %8:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
  2187. // I1 and I2 are different instructions but produce same values,
  2188. // %1 and %6 are same, %1 and %7 are not the same value.
  2189. return I1->findRegisterDefOperandIdx(InstAndDef1->Reg) ==
  2190. I2->findRegisterDefOperandIdx(InstAndDef2->Reg);
  2191. }
  2192. return false;
  2193. }
  2194. bool CombinerHelper::matchConstantOp(const MachineOperand &MOP, int64_t C) {
  2195. if (!MOP.isReg())
  2196. return false;
  2197. auto *MI = MRI.getVRegDef(MOP.getReg());
  2198. auto MaybeCst = isConstantOrConstantSplatVector(*MI, MRI);
  2199. return MaybeCst && MaybeCst->getBitWidth() <= 64 &&
  2200. MaybeCst->getSExtValue() == C;
  2201. }
  2202. bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI,
  2203. unsigned OpIdx) {
  2204. assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
  2205. Register OldReg = MI.getOperand(0).getReg();
  2206. Register Replacement = MI.getOperand(OpIdx).getReg();
  2207. assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
  2208. MI.eraseFromParent();
  2209. replaceRegWith(MRI, OldReg, Replacement);
  2210. return true;
  2211. }
  2212. bool CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI,
  2213. Register Replacement) {
  2214. assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
  2215. Register OldReg = MI.getOperand(0).getReg();
  2216. assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
  2217. MI.eraseFromParent();
  2218. replaceRegWith(MRI, OldReg, Replacement);
  2219. return true;
  2220. }
  2221. bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) {
  2222. assert(MI.getOpcode() == TargetOpcode::G_SELECT);
  2223. // Match (cond ? x : x)
  2224. return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) &&
  2225. canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(2).getReg(),
  2226. MRI);
  2227. }
  2228. bool CombinerHelper::matchBinOpSameVal(MachineInstr &MI) {
  2229. return matchEqualDefs(MI.getOperand(1), MI.getOperand(2)) &&
  2230. canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
  2231. MRI);
  2232. }
  2233. bool CombinerHelper::matchOperandIsZero(MachineInstr &MI, unsigned OpIdx) {
  2234. return matchConstantOp(MI.getOperand(OpIdx), 0) &&
  2235. canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(OpIdx).getReg(),
  2236. MRI);
  2237. }
  2238. bool CombinerHelper::matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx) {
  2239. MachineOperand &MO = MI.getOperand(OpIdx);
  2240. return MO.isReg() &&
  2241. getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
  2242. }
  2243. bool CombinerHelper::matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI,
  2244. unsigned OpIdx) {
  2245. MachineOperand &MO = MI.getOperand(OpIdx);
  2246. return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, KB);
  2247. }
  2248. bool CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) {
  2249. assert(MI.getNumDefs() == 1 && "Expected only one def?");
  2250. Builder.setInstr(MI);
  2251. Builder.buildFConstant(MI.getOperand(0), C);
  2252. MI.eraseFromParent();
  2253. return true;
  2254. }
  2255. bool CombinerHelper::replaceInstWithConstant(MachineInstr &MI, int64_t C) {
  2256. assert(MI.getNumDefs() == 1 && "Expected only one def?");
  2257. Builder.setInstr(MI);
  2258. Builder.buildConstant(MI.getOperand(0), C);
  2259. MI.eraseFromParent();
  2260. return true;
  2261. }
  2262. bool CombinerHelper::replaceInstWithConstant(MachineInstr &MI, APInt C) {
  2263. assert(MI.getNumDefs() == 1 && "Expected only one def?");
  2264. Builder.setInstr(MI);
  2265. Builder.buildConstant(MI.getOperand(0), C);
  2266. MI.eraseFromParent();
  2267. return true;
  2268. }
  2269. bool CombinerHelper::replaceInstWithUndef(MachineInstr &MI) {
  2270. assert(MI.getNumDefs() == 1 && "Expected only one def?");
  2271. Builder.setInstr(MI);
  2272. Builder.buildUndef(MI.getOperand(0));
  2273. MI.eraseFromParent();
  2274. return true;
  2275. }
  2276. bool CombinerHelper::matchSimplifyAddToSub(
  2277. MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
  2278. Register LHS = MI.getOperand(1).getReg();
  2279. Register RHS = MI.getOperand(2).getReg();
  2280. Register &NewLHS = std::get<0>(MatchInfo);
  2281. Register &NewRHS = std::get<1>(MatchInfo);
  2282. // Helper lambda to check for opportunities for
  2283. // ((0-A) + B) -> B - A
  2284. // (A + (0-B)) -> A - B
  2285. auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) {
  2286. if (!mi_match(MaybeSub, MRI, m_Neg(m_Reg(NewRHS))))
  2287. return false;
  2288. NewLHS = MaybeNewLHS;
  2289. return true;
  2290. };
  2291. return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
  2292. }
  2293. bool CombinerHelper::matchCombineInsertVecElts(
  2294. MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) {
  2295. assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT &&
  2296. "Invalid opcode");
  2297. Register DstReg = MI.getOperand(0).getReg();
  2298. LLT DstTy = MRI.getType(DstReg);
  2299. assert(DstTy.isVector() && "Invalid G_INSERT_VECTOR_ELT?");
  2300. unsigned NumElts = DstTy.getNumElements();
  2301. // If this MI is part of a sequence of insert_vec_elts, then
  2302. // don't do the combine in the middle of the sequence.
  2303. if (MRI.hasOneUse(DstReg) && MRI.use_instr_begin(DstReg)->getOpcode() ==
  2304. TargetOpcode::G_INSERT_VECTOR_ELT)
  2305. return false;
  2306. MachineInstr *CurrInst = &MI;
  2307. MachineInstr *TmpInst;
  2308. int64_t IntImm;
  2309. Register TmpReg;
  2310. MatchInfo.resize(NumElts);
  2311. while (mi_match(
  2312. CurrInst->getOperand(0).getReg(), MRI,
  2313. m_GInsertVecElt(m_MInstr(TmpInst), m_Reg(TmpReg), m_ICst(IntImm)))) {
  2314. if (IntImm >= NumElts || IntImm < 0)
  2315. return false;
  2316. if (!MatchInfo[IntImm])
  2317. MatchInfo[IntImm] = TmpReg;
  2318. CurrInst = TmpInst;
  2319. }
  2320. // Variable index.
  2321. if (CurrInst->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
  2322. return false;
  2323. if (TmpInst->getOpcode() == TargetOpcode::G_BUILD_VECTOR) {
  2324. for (unsigned I = 1; I < TmpInst->getNumOperands(); ++I) {
  2325. if (!MatchInfo[I - 1].isValid())
  2326. MatchInfo[I - 1] = TmpInst->getOperand(I).getReg();
  2327. }
  2328. return true;
  2329. }
  2330. // If we didn't end in a G_IMPLICIT_DEF, bail out.
  2331. return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF;
  2332. }
  2333. void CombinerHelper::applyCombineInsertVecElts(
  2334. MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) {
  2335. Builder.setInstr(MI);
  2336. Register UndefReg;
  2337. auto GetUndef = [&]() {
  2338. if (UndefReg)
  2339. return UndefReg;
  2340. LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
  2341. UndefReg = Builder.buildUndef(DstTy.getScalarType()).getReg(0);
  2342. return UndefReg;
  2343. };
  2344. for (unsigned I = 0; I < MatchInfo.size(); ++I) {
  2345. if (!MatchInfo[I])
  2346. MatchInfo[I] = GetUndef();
  2347. }
  2348. Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo);
  2349. MI.eraseFromParent();
  2350. }
  2351. void CombinerHelper::applySimplifyAddToSub(
  2352. MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
  2353. Builder.setInstr(MI);
  2354. Register SubLHS, SubRHS;
  2355. std::tie(SubLHS, SubRHS) = MatchInfo;
  2356. Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS);
  2357. MI.eraseFromParent();
  2358. }
  2359. bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands(
  2360. MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) {
  2361. // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ...
  2362. //
  2363. // Creates the new hand + logic instruction (but does not insert them.)
  2364. //
  2365. // On success, MatchInfo is populated with the new instructions. These are
  2366. // inserted in applyHoistLogicOpWithSameOpcodeHands.
  2367. unsigned LogicOpcode = MI.getOpcode();
  2368. assert(LogicOpcode == TargetOpcode::G_AND ||
  2369. LogicOpcode == TargetOpcode::G_OR ||
  2370. LogicOpcode == TargetOpcode::G_XOR);
  2371. MachineIRBuilder MIB(MI);
  2372. Register Dst = MI.getOperand(0).getReg();
  2373. Register LHSReg = MI.getOperand(1).getReg();
  2374. Register RHSReg = MI.getOperand(2).getReg();
  2375. // Don't recompute anything.
  2376. if (!MRI.hasOneNonDBGUse(LHSReg) || !MRI.hasOneNonDBGUse(RHSReg))
  2377. return false;
  2378. // Make sure we have (hand x, ...), (hand y, ...)
  2379. MachineInstr *LeftHandInst = getDefIgnoringCopies(LHSReg, MRI);
  2380. MachineInstr *RightHandInst = getDefIgnoringCopies(RHSReg, MRI);
  2381. if (!LeftHandInst || !RightHandInst)
  2382. return false;
  2383. unsigned HandOpcode = LeftHandInst->getOpcode();
  2384. if (HandOpcode != RightHandInst->getOpcode())
  2385. return false;
  2386. if (!LeftHandInst->getOperand(1).isReg() ||
  2387. !RightHandInst->getOperand(1).isReg())
  2388. return false;
  2389. // Make sure the types match up, and if we're doing this post-legalization,
  2390. // we end up with legal types.
  2391. Register X = LeftHandInst->getOperand(1).getReg();
  2392. Register Y = RightHandInst->getOperand(1).getReg();
  2393. LLT XTy = MRI.getType(X);
  2394. LLT YTy = MRI.getType(Y);
  2395. if (XTy != YTy)
  2396. return false;
  2397. if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}}))
  2398. return false;
  2399. // Optional extra source register.
  2400. Register ExtraHandOpSrcReg;
  2401. switch (HandOpcode) {
  2402. default:
  2403. return false;
  2404. case TargetOpcode::G_ANYEXT:
  2405. case TargetOpcode::G_SEXT:
  2406. case TargetOpcode::G_ZEXT: {
  2407. // Match: logic (ext X), (ext Y) --> ext (logic X, Y)
  2408. break;
  2409. }
  2410. case TargetOpcode::G_AND:
  2411. case TargetOpcode::G_ASHR:
  2412. case TargetOpcode::G_LSHR:
  2413. case TargetOpcode::G_SHL: {
  2414. // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z
  2415. MachineOperand &ZOp = LeftHandInst->getOperand(2);
  2416. if (!matchEqualDefs(ZOp, RightHandInst->getOperand(2)))
  2417. return false;
  2418. ExtraHandOpSrcReg = ZOp.getReg();
  2419. break;
  2420. }
  2421. }
  2422. // Record the steps to build the new instructions.
  2423. //
  2424. // Steps to build (logic x, y)
  2425. auto NewLogicDst = MRI.createGenericVirtualRegister(XTy);
  2426. OperandBuildSteps LogicBuildSteps = {
  2427. [=](MachineInstrBuilder &MIB) { MIB.addDef(NewLogicDst); },
  2428. [=](MachineInstrBuilder &MIB) { MIB.addReg(X); },
  2429. [=](MachineInstrBuilder &MIB) { MIB.addReg(Y); }};
  2430. InstructionBuildSteps LogicSteps(LogicOpcode, LogicBuildSteps);
  2431. // Steps to build hand (logic x, y), ...z
  2432. OperandBuildSteps HandBuildSteps = {
  2433. [=](MachineInstrBuilder &MIB) { MIB.addDef(Dst); },
  2434. [=](MachineInstrBuilder &MIB) { MIB.addReg(NewLogicDst); }};
  2435. if (ExtraHandOpSrcReg.isValid())
  2436. HandBuildSteps.push_back(
  2437. [=](MachineInstrBuilder &MIB) { MIB.addReg(ExtraHandOpSrcReg); });
  2438. InstructionBuildSteps HandSteps(HandOpcode, HandBuildSteps);
  2439. MatchInfo = InstructionStepsMatchInfo({LogicSteps, HandSteps});
  2440. return true;
  2441. }
  2442. void CombinerHelper::applyBuildInstructionSteps(
  2443. MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) {
  2444. assert(MatchInfo.InstrsToBuild.size() &&
  2445. "Expected at least one instr to build?");
  2446. Builder.setInstr(MI);
  2447. for (auto &InstrToBuild : MatchInfo.InstrsToBuild) {
  2448. assert(InstrToBuild.Opcode && "Expected a valid opcode?");
  2449. assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?");
  2450. MachineInstrBuilder Instr = Builder.buildInstr(InstrToBuild.Opcode);
  2451. for (auto &OperandFn : InstrToBuild.OperandFns)
  2452. OperandFn(Instr);
  2453. }
  2454. MI.eraseFromParent();
  2455. }
  2456. bool CombinerHelper::matchAshrShlToSextInreg(
  2457. MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
  2458. assert(MI.getOpcode() == TargetOpcode::G_ASHR);
  2459. int64_t ShlCst, AshrCst;
  2460. Register Src;
  2461. if (!mi_match(MI.getOperand(0).getReg(), MRI,
  2462. m_GAShr(m_GShl(m_Reg(Src), m_ICstOrSplat(ShlCst)),
  2463. m_ICstOrSplat(AshrCst))))
  2464. return false;
  2465. if (ShlCst != AshrCst)
  2466. return false;
  2467. if (!isLegalOrBeforeLegalizer(
  2468. {TargetOpcode::G_SEXT_INREG, {MRI.getType(Src)}}))
  2469. return false;
  2470. MatchInfo = std::make_tuple(Src, ShlCst);
  2471. return true;
  2472. }
  2473. void CombinerHelper::applyAshShlToSextInreg(
  2474. MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
  2475. assert(MI.getOpcode() == TargetOpcode::G_ASHR);
  2476. Register Src;
  2477. int64_t ShiftAmt;
  2478. std::tie(Src, ShiftAmt) = MatchInfo;
  2479. unsigned Size = MRI.getType(Src).getScalarSizeInBits();
  2480. Builder.setInstrAndDebugLoc(MI);
  2481. Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt);
  2482. MI.eraseFromParent();
  2483. }
  2484. /// and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0
  2485. bool CombinerHelper::matchOverlappingAnd(
  2486. MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
  2487. assert(MI.getOpcode() == TargetOpcode::G_AND);
  2488. Register Dst = MI.getOperand(0).getReg();
  2489. LLT Ty = MRI.getType(Dst);
  2490. Register R;
  2491. int64_t C1;
  2492. int64_t C2;
  2493. if (!mi_match(
  2494. Dst, MRI,
  2495. m_GAnd(m_GAnd(m_Reg(R), m_ICst(C1)), m_ICst(C2))))
  2496. return false;
  2497. MatchInfo = [=](MachineIRBuilder &B) {
  2498. if (C1 & C2) {
  2499. B.buildAnd(Dst, R, B.buildConstant(Ty, C1 & C2));
  2500. return;
  2501. }
  2502. auto Zero = B.buildConstant(Ty, 0);
  2503. replaceRegWith(MRI, Dst, Zero->getOperand(0).getReg());
  2504. };
  2505. return true;
  2506. }
  2507. bool CombinerHelper::matchRedundantAnd(MachineInstr &MI,
  2508. Register &Replacement) {
  2509. // Given
  2510. //
  2511. // %y:_(sN) = G_SOMETHING
  2512. // %x:_(sN) = G_SOMETHING
  2513. // %res:_(sN) = G_AND %x, %y
  2514. //
  2515. // Eliminate the G_AND when it is known that x & y == x or x & y == y.
  2516. //
  2517. // Patterns like this can appear as a result of legalization. E.g.
  2518. //
  2519. // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y
  2520. // %one:_(s32) = G_CONSTANT i32 1
  2521. // %and:_(s32) = G_AND %cmp, %one
  2522. //
  2523. // In this case, G_ICMP only produces a single bit, so x & 1 == x.
  2524. assert(MI.getOpcode() == TargetOpcode::G_AND);
  2525. if (!KB)
  2526. return false;
  2527. Register AndDst = MI.getOperand(0).getReg();
  2528. Register LHS = MI.getOperand(1).getReg();
  2529. Register RHS = MI.getOperand(2).getReg();
  2530. KnownBits LHSBits = KB->getKnownBits(LHS);
  2531. KnownBits RHSBits = KB->getKnownBits(RHS);
  2532. // Check that x & Mask == x.
  2533. // x & 1 == x, always
  2534. // x & 0 == x, only if x is also 0
  2535. // Meaning Mask has no effect if every bit is either one in Mask or zero in x.
  2536. //
  2537. // Check if we can replace AndDst with the LHS of the G_AND
  2538. if (canReplaceReg(AndDst, LHS, MRI) &&
  2539. (LHSBits.Zero | RHSBits.One).isAllOnes()) {
  2540. Replacement = LHS;
  2541. return true;
  2542. }
  2543. // Check if we can replace AndDst with the RHS of the G_AND
  2544. if (canReplaceReg(AndDst, RHS, MRI) &&
  2545. (LHSBits.One | RHSBits.Zero).isAllOnes()) {
  2546. Replacement = RHS;
  2547. return true;
  2548. }
  2549. return false;
  2550. }
  2551. bool CombinerHelper::matchRedundantOr(MachineInstr &MI, Register &Replacement) {
  2552. // Given
  2553. //
  2554. // %y:_(sN) = G_SOMETHING
  2555. // %x:_(sN) = G_SOMETHING
  2556. // %res:_(sN) = G_OR %x, %y
  2557. //
  2558. // Eliminate the G_OR when it is known that x | y == x or x | y == y.
  2559. assert(MI.getOpcode() == TargetOpcode::G_OR);
  2560. if (!KB)
  2561. return false;
  2562. Register OrDst = MI.getOperand(0).getReg();
  2563. Register LHS = MI.getOperand(1).getReg();
  2564. Register RHS = MI.getOperand(2).getReg();
  2565. KnownBits LHSBits = KB->getKnownBits(LHS);
  2566. KnownBits RHSBits = KB->getKnownBits(RHS);
  2567. // Check that x | Mask == x.
  2568. // x | 0 == x, always
  2569. // x | 1 == x, only if x is also 1
  2570. // Meaning Mask has no effect if every bit is either zero in Mask or one in x.
  2571. //
  2572. // Check if we can replace OrDst with the LHS of the G_OR
  2573. if (canReplaceReg(OrDst, LHS, MRI) &&
  2574. (LHSBits.One | RHSBits.Zero).isAllOnes()) {
  2575. Replacement = LHS;
  2576. return true;
  2577. }
  2578. // Check if we can replace OrDst with the RHS of the G_OR
  2579. if (canReplaceReg(OrDst, RHS, MRI) &&
  2580. (LHSBits.Zero | RHSBits.One).isAllOnes()) {
  2581. Replacement = RHS;
  2582. return true;
  2583. }
  2584. return false;
  2585. }
  2586. bool CombinerHelper::matchRedundantSExtInReg(MachineInstr &MI) {
  2587. // If the input is already sign extended, just drop the extension.
  2588. Register Src = MI.getOperand(1).getReg();
  2589. unsigned ExtBits = MI.getOperand(2).getImm();
  2590. unsigned TypeSize = MRI.getType(Src).getScalarSizeInBits();
  2591. return KB->computeNumSignBits(Src) >= (TypeSize - ExtBits + 1);
  2592. }
  2593. static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits,
  2594. int64_t Cst, bool IsVector, bool IsFP) {
  2595. // For i1, Cst will always be -1 regardless of boolean contents.
  2596. return (ScalarSizeBits == 1 && Cst == -1) ||
  2597. isConstTrueVal(TLI, Cst, IsVector, IsFP);
  2598. }
  2599. bool CombinerHelper::matchNotCmp(MachineInstr &MI,
  2600. SmallVectorImpl<Register> &RegsToNegate) {
  2601. assert(MI.getOpcode() == TargetOpcode::G_XOR);
  2602. LLT Ty = MRI.getType(MI.getOperand(0).getReg());
  2603. const auto &TLI = *Builder.getMF().getSubtarget().getTargetLowering();
  2604. Register XorSrc;
  2605. Register CstReg;
  2606. // We match xor(src, true) here.
  2607. if (!mi_match(MI.getOperand(0).getReg(), MRI,
  2608. m_GXor(m_Reg(XorSrc), m_Reg(CstReg))))
  2609. return false;
  2610. if (!MRI.hasOneNonDBGUse(XorSrc))
  2611. return false;
  2612. // Check that XorSrc is the root of a tree of comparisons combined with ANDs
  2613. // and ORs. The suffix of RegsToNegate starting from index I is used a work
  2614. // list of tree nodes to visit.
  2615. RegsToNegate.push_back(XorSrc);
  2616. // Remember whether the comparisons are all integer or all floating point.
  2617. bool IsInt = false;
  2618. bool IsFP = false;
  2619. for (unsigned I = 0; I < RegsToNegate.size(); ++I) {
  2620. Register Reg = RegsToNegate[I];
  2621. if (!MRI.hasOneNonDBGUse(Reg))
  2622. return false;
  2623. MachineInstr *Def = MRI.getVRegDef(Reg);
  2624. switch (Def->getOpcode()) {
  2625. default:
  2626. // Don't match if the tree contains anything other than ANDs, ORs and
  2627. // comparisons.
  2628. return false;
  2629. case TargetOpcode::G_ICMP:
  2630. if (IsFP)
  2631. return false;
  2632. IsInt = true;
  2633. // When we apply the combine we will invert the predicate.
  2634. break;
  2635. case TargetOpcode::G_FCMP:
  2636. if (IsInt)
  2637. return false;
  2638. IsFP = true;
  2639. // When we apply the combine we will invert the predicate.
  2640. break;
  2641. case TargetOpcode::G_AND:
  2642. case TargetOpcode::G_OR:
  2643. // Implement De Morgan's laws:
  2644. // ~(x & y) -> ~x | ~y
  2645. // ~(x | y) -> ~x & ~y
  2646. // When we apply the combine we will change the opcode and recursively
  2647. // negate the operands.
  2648. RegsToNegate.push_back(Def->getOperand(1).getReg());
  2649. RegsToNegate.push_back(Def->getOperand(2).getReg());
  2650. break;
  2651. }
  2652. }
  2653. // Now we know whether the comparisons are integer or floating point, check
  2654. // the constant in the xor.
  2655. int64_t Cst;
  2656. if (Ty.isVector()) {
  2657. MachineInstr *CstDef = MRI.getVRegDef(CstReg);
  2658. auto MaybeCst = getIConstantSplatSExtVal(*CstDef, MRI);
  2659. if (!MaybeCst)
  2660. return false;
  2661. if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP))
  2662. return false;
  2663. } else {
  2664. if (!mi_match(CstReg, MRI, m_ICst(Cst)))
  2665. return false;
  2666. if (!isConstValidTrue(TLI, Ty.getSizeInBits(), Cst, false, IsFP))
  2667. return false;
  2668. }
  2669. return true;
  2670. }
  2671. void CombinerHelper::applyNotCmp(MachineInstr &MI,
  2672. SmallVectorImpl<Register> &RegsToNegate) {
  2673. for (Register Reg : RegsToNegate) {
  2674. MachineInstr *Def = MRI.getVRegDef(Reg);
  2675. Observer.changingInstr(*Def);
  2676. // For each comparison, invert the opcode. For each AND and OR, change the
  2677. // opcode.
  2678. switch (Def->getOpcode()) {
  2679. default:
  2680. llvm_unreachable("Unexpected opcode");
  2681. case TargetOpcode::G_ICMP:
  2682. case TargetOpcode::G_FCMP: {
  2683. MachineOperand &PredOp = Def->getOperand(1);
  2684. CmpInst::Predicate NewP = CmpInst::getInversePredicate(
  2685. (CmpInst::Predicate)PredOp.getPredicate());
  2686. PredOp.setPredicate(NewP);
  2687. break;
  2688. }
  2689. case TargetOpcode::G_AND:
  2690. Def->setDesc(Builder.getTII().get(TargetOpcode::G_OR));
  2691. break;
  2692. case TargetOpcode::G_OR:
  2693. Def->setDesc(Builder.getTII().get(TargetOpcode::G_AND));
  2694. break;
  2695. }
  2696. Observer.changedInstr(*Def);
  2697. }
  2698. replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
  2699. MI.eraseFromParent();
  2700. }
  2701. bool CombinerHelper::matchXorOfAndWithSameReg(
  2702. MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
  2703. // Match (xor (and x, y), y) (or any of its commuted cases)
  2704. assert(MI.getOpcode() == TargetOpcode::G_XOR);
  2705. Register &X = MatchInfo.first;
  2706. Register &Y = MatchInfo.second;
  2707. Register AndReg = MI.getOperand(1).getReg();
  2708. Register SharedReg = MI.getOperand(2).getReg();
  2709. // Find a G_AND on either side of the G_XOR.
  2710. // Look for one of
  2711. //
  2712. // (xor (and x, y), SharedReg)
  2713. // (xor SharedReg, (and x, y))
  2714. if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) {
  2715. std::swap(AndReg, SharedReg);
  2716. if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y))))
  2717. return false;
  2718. }
  2719. // Only do this if we'll eliminate the G_AND.
  2720. if (!MRI.hasOneNonDBGUse(AndReg))
  2721. return false;
  2722. // We can combine if SharedReg is the same as either the LHS or RHS of the
  2723. // G_AND.
  2724. if (Y != SharedReg)
  2725. std::swap(X, Y);
  2726. return Y == SharedReg;
  2727. }
  2728. void CombinerHelper::applyXorOfAndWithSameReg(
  2729. MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
  2730. // Fold (xor (and x, y), y) -> (and (not x), y)
  2731. Builder.setInstrAndDebugLoc(MI);
  2732. Register X, Y;
  2733. std::tie(X, Y) = MatchInfo;
  2734. auto Not = Builder.buildNot(MRI.getType(X), X);
  2735. Observer.changingInstr(MI);
  2736. MI.setDesc(Builder.getTII().get(TargetOpcode::G_AND));
  2737. MI.getOperand(1).setReg(Not->getOperand(0).getReg());
  2738. MI.getOperand(2).setReg(Y);
  2739. Observer.changedInstr(MI);
  2740. }
  2741. bool CombinerHelper::matchPtrAddZero(MachineInstr &MI) {
  2742. auto &PtrAdd = cast<GPtrAdd>(MI);
  2743. Register DstReg = PtrAdd.getReg(0);
  2744. LLT Ty = MRI.getType(DstReg);
  2745. const DataLayout &DL = Builder.getMF().getDataLayout();
  2746. if (DL.isNonIntegralAddressSpace(Ty.getScalarType().getAddressSpace()))
  2747. return false;
  2748. if (Ty.isPointer()) {
  2749. auto ConstVal = getIConstantVRegVal(PtrAdd.getBaseReg(), MRI);
  2750. return ConstVal && *ConstVal == 0;
  2751. }
  2752. assert(Ty.isVector() && "Expecting a vector type");
  2753. const MachineInstr *VecMI = MRI.getVRegDef(PtrAdd.getBaseReg());
  2754. return isBuildVectorAllZeros(*VecMI, MRI);
  2755. }
  2756. void CombinerHelper::applyPtrAddZero(MachineInstr &MI) {
  2757. auto &PtrAdd = cast<GPtrAdd>(MI);
  2758. Builder.setInstrAndDebugLoc(PtrAdd);
  2759. Builder.buildIntToPtr(PtrAdd.getReg(0), PtrAdd.getOffsetReg());
  2760. PtrAdd.eraseFromParent();
  2761. }
  2762. /// The second source operand is known to be a power of 2.
  2763. void CombinerHelper::applySimplifyURemByPow2(MachineInstr &MI) {
  2764. Register DstReg = MI.getOperand(0).getReg();
  2765. Register Src0 = MI.getOperand(1).getReg();
  2766. Register Pow2Src1 = MI.getOperand(2).getReg();
  2767. LLT Ty = MRI.getType(DstReg);
  2768. Builder.setInstrAndDebugLoc(MI);
  2769. // Fold (urem x, pow2) -> (and x, pow2-1)
  2770. auto NegOne = Builder.buildConstant(Ty, -1);
  2771. auto Add = Builder.buildAdd(Ty, Pow2Src1, NegOne);
  2772. Builder.buildAnd(DstReg, Src0, Add);
  2773. MI.eraseFromParent();
  2774. }
  2775. bool CombinerHelper::matchFoldBinOpIntoSelect(MachineInstr &MI,
  2776. unsigned &SelectOpNo) {
  2777. Register LHS = MI.getOperand(1).getReg();
  2778. Register RHS = MI.getOperand(2).getReg();
  2779. Register OtherOperandReg = RHS;
  2780. SelectOpNo = 1;
  2781. MachineInstr *Select = MRI.getVRegDef(LHS);
  2782. // Don't do this unless the old select is going away. We want to eliminate the
  2783. // binary operator, not replace a binop with a select.
  2784. if (Select->getOpcode() != TargetOpcode::G_SELECT ||
  2785. !MRI.hasOneNonDBGUse(LHS)) {
  2786. OtherOperandReg = LHS;
  2787. SelectOpNo = 2;
  2788. Select = MRI.getVRegDef(RHS);
  2789. if (Select->getOpcode() != TargetOpcode::G_SELECT ||
  2790. !MRI.hasOneNonDBGUse(RHS))
  2791. return false;
  2792. }
  2793. MachineInstr *SelectLHS = MRI.getVRegDef(Select->getOperand(2).getReg());
  2794. MachineInstr *SelectRHS = MRI.getVRegDef(Select->getOperand(3).getReg());
  2795. if (!isConstantOrConstantVector(*SelectLHS, MRI,
  2796. /*AllowFP*/ true,
  2797. /*AllowOpaqueConstants*/ false))
  2798. return false;
  2799. if (!isConstantOrConstantVector(*SelectRHS, MRI,
  2800. /*AllowFP*/ true,
  2801. /*AllowOpaqueConstants*/ false))
  2802. return false;
  2803. unsigned BinOpcode = MI.getOpcode();
  2804. // We know know one of the operands is a select of constants. Now verify that
  2805. // the other binary operator operand is either a constant, or we can handle a
  2806. // variable.
  2807. bool CanFoldNonConst =
  2808. (BinOpcode == TargetOpcode::G_AND || BinOpcode == TargetOpcode::G_OR) &&
  2809. (isNullOrNullSplat(*SelectLHS, MRI) ||
  2810. isAllOnesOrAllOnesSplat(*SelectLHS, MRI)) &&
  2811. (isNullOrNullSplat(*SelectRHS, MRI) ||
  2812. isAllOnesOrAllOnesSplat(*SelectRHS, MRI));
  2813. if (CanFoldNonConst)
  2814. return true;
  2815. return isConstantOrConstantVector(*MRI.getVRegDef(OtherOperandReg), MRI,
  2816. /*AllowFP*/ true,
  2817. /*AllowOpaqueConstants*/ false);
  2818. }
  2819. /// \p SelectOperand is the operand in binary operator \p MI that is the select
  2820. /// to fold.
  2821. bool CombinerHelper::applyFoldBinOpIntoSelect(MachineInstr &MI,
  2822. const unsigned &SelectOperand) {
  2823. Builder.setInstrAndDebugLoc(MI);
  2824. Register Dst = MI.getOperand(0).getReg();
  2825. Register LHS = MI.getOperand(1).getReg();
  2826. Register RHS = MI.getOperand(2).getReg();
  2827. MachineInstr *Select = MRI.getVRegDef(MI.getOperand(SelectOperand).getReg());
  2828. Register SelectCond = Select->getOperand(1).getReg();
  2829. Register SelectTrue = Select->getOperand(2).getReg();
  2830. Register SelectFalse = Select->getOperand(3).getReg();
  2831. LLT Ty = MRI.getType(Dst);
  2832. unsigned BinOpcode = MI.getOpcode();
  2833. Register FoldTrue, FoldFalse;
  2834. // We have a select-of-constants followed by a binary operator with a
  2835. // constant. Eliminate the binop by pulling the constant math into the select.
  2836. // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
  2837. if (SelectOperand == 1) {
  2838. // TODO: SelectionDAG verifies this actually constant folds before
  2839. // committing to the combine.
  2840. FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {SelectTrue, RHS}).getReg(0);
  2841. FoldFalse =
  2842. Builder.buildInstr(BinOpcode, {Ty}, {SelectFalse, RHS}).getReg(0);
  2843. } else {
  2844. FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectTrue}).getReg(0);
  2845. FoldFalse =
  2846. Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectFalse}).getReg(0);
  2847. }
  2848. Builder.buildSelect(Dst, SelectCond, FoldTrue, FoldFalse, MI.getFlags());
  2849. MI.eraseFromParent();
  2850. return true;
  2851. }
  2852. std::optional<SmallVector<Register, 8>>
  2853. CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const {
  2854. assert(Root->getOpcode() == TargetOpcode::G_OR && "Expected G_OR only!");
  2855. // We want to detect if Root is part of a tree which represents a bunch
  2856. // of loads being merged into a larger load. We'll try to recognize patterns
  2857. // like, for example:
  2858. //
  2859. // Reg Reg
  2860. // \ /
  2861. // OR_1 Reg
  2862. // \ /
  2863. // OR_2
  2864. // \ Reg
  2865. // .. /
  2866. // Root
  2867. //
  2868. // Reg Reg Reg Reg
  2869. // \ / \ /
  2870. // OR_1 OR_2
  2871. // \ /
  2872. // \ /
  2873. // ...
  2874. // Root
  2875. //
  2876. // Each "Reg" may have been produced by a load + some arithmetic. This
  2877. // function will save each of them.
  2878. SmallVector<Register, 8> RegsToVisit;
  2879. SmallVector<const MachineInstr *, 7> Ors = {Root};
  2880. // In the "worst" case, we're dealing with a load for each byte. So, there
  2881. // are at most #bytes - 1 ORs.
  2882. const unsigned MaxIter =
  2883. MRI.getType(Root->getOperand(0).getReg()).getSizeInBytes() - 1;
  2884. for (unsigned Iter = 0; Iter < MaxIter; ++Iter) {
  2885. if (Ors.empty())
  2886. break;
  2887. const MachineInstr *Curr = Ors.pop_back_val();
  2888. Register OrLHS = Curr->getOperand(1).getReg();
  2889. Register OrRHS = Curr->getOperand(2).getReg();
  2890. // In the combine, we want to elimate the entire tree.
  2891. if (!MRI.hasOneNonDBGUse(OrLHS) || !MRI.hasOneNonDBGUse(OrRHS))
  2892. return std::nullopt;
  2893. // If it's a G_OR, save it and continue to walk. If it's not, then it's
  2894. // something that may be a load + arithmetic.
  2895. if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrLHS, MRI))
  2896. Ors.push_back(Or);
  2897. else
  2898. RegsToVisit.push_back(OrLHS);
  2899. if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrRHS, MRI))
  2900. Ors.push_back(Or);
  2901. else
  2902. RegsToVisit.push_back(OrRHS);
  2903. }
  2904. // We're going to try and merge each register into a wider power-of-2 type,
  2905. // so we ought to have an even number of registers.
  2906. if (RegsToVisit.empty() || RegsToVisit.size() % 2 != 0)
  2907. return std::nullopt;
  2908. return RegsToVisit;
  2909. }
  2910. /// Helper function for findLoadOffsetsForLoadOrCombine.
  2911. ///
  2912. /// Check if \p Reg is the result of loading a \p MemSizeInBits wide value,
  2913. /// and then moving that value into a specific byte offset.
  2914. ///
  2915. /// e.g. x[i] << 24
  2916. ///
  2917. /// \returns The load instruction and the byte offset it is moved into.
  2918. static std::optional<std::pair<GZExtLoad *, int64_t>>
  2919. matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits,
  2920. const MachineRegisterInfo &MRI) {
  2921. assert(MRI.hasOneNonDBGUse(Reg) &&
  2922. "Expected Reg to only have one non-debug use?");
  2923. Register MaybeLoad;
  2924. int64_t Shift;
  2925. if (!mi_match(Reg, MRI,
  2926. m_OneNonDBGUse(m_GShl(m_Reg(MaybeLoad), m_ICst(Shift))))) {
  2927. Shift = 0;
  2928. MaybeLoad = Reg;
  2929. }
  2930. if (Shift % MemSizeInBits != 0)
  2931. return std::nullopt;
  2932. // TODO: Handle other types of loads.
  2933. auto *Load = getOpcodeDef<GZExtLoad>(MaybeLoad, MRI);
  2934. if (!Load)
  2935. return std::nullopt;
  2936. if (!Load->isUnordered() || Load->getMemSizeInBits() != MemSizeInBits)
  2937. return std::nullopt;
  2938. return std::make_pair(Load, Shift / MemSizeInBits);
  2939. }
  2940. std::optional<std::tuple<GZExtLoad *, int64_t, GZExtLoad *>>
  2941. CombinerHelper::findLoadOffsetsForLoadOrCombine(
  2942. SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx,
  2943. const SmallVector<Register, 8> &RegsToVisit, const unsigned MemSizeInBits) {
  2944. // Each load found for the pattern. There should be one for each RegsToVisit.
  2945. SmallSetVector<const MachineInstr *, 8> Loads;
  2946. // The lowest index used in any load. (The lowest "i" for each x[i].)
  2947. int64_t LowestIdx = INT64_MAX;
  2948. // The load which uses the lowest index.
  2949. GZExtLoad *LowestIdxLoad = nullptr;
  2950. // Keeps track of the load indices we see. We shouldn't see any indices twice.
  2951. SmallSet<int64_t, 8> SeenIdx;
  2952. // Ensure each load is in the same MBB.
  2953. // TODO: Support multiple MachineBasicBlocks.
  2954. MachineBasicBlock *MBB = nullptr;
  2955. const MachineMemOperand *MMO = nullptr;
  2956. // Earliest instruction-order load in the pattern.
  2957. GZExtLoad *EarliestLoad = nullptr;
  2958. // Latest instruction-order load in the pattern.
  2959. GZExtLoad *LatestLoad = nullptr;
  2960. // Base pointer which every load should share.
  2961. Register BasePtr;
  2962. // We want to find a load for each register. Each load should have some
  2963. // appropriate bit twiddling arithmetic. During this loop, we will also keep
  2964. // track of the load which uses the lowest index. Later, we will check if we
  2965. // can use its pointer in the final, combined load.
  2966. for (auto Reg : RegsToVisit) {
  2967. // Find the load, and find the position that it will end up in (e.g. a
  2968. // shifted) value.
  2969. auto LoadAndPos = matchLoadAndBytePosition(Reg, MemSizeInBits, MRI);
  2970. if (!LoadAndPos)
  2971. return std::nullopt;
  2972. GZExtLoad *Load;
  2973. int64_t DstPos;
  2974. std::tie(Load, DstPos) = *LoadAndPos;
  2975. // TODO: Handle multiple MachineBasicBlocks. Currently not handled because
  2976. // it is difficult to check for stores/calls/etc between loads.
  2977. MachineBasicBlock *LoadMBB = Load->getParent();
  2978. if (!MBB)
  2979. MBB = LoadMBB;
  2980. if (LoadMBB != MBB)
  2981. return std::nullopt;
  2982. // Make sure that the MachineMemOperands of every seen load are compatible.
  2983. auto &LoadMMO = Load->getMMO();
  2984. if (!MMO)
  2985. MMO = &LoadMMO;
  2986. if (MMO->getAddrSpace() != LoadMMO.getAddrSpace())
  2987. return std::nullopt;
  2988. // Find out what the base pointer and index for the load is.
  2989. Register LoadPtr;
  2990. int64_t Idx;
  2991. if (!mi_match(Load->getOperand(1).getReg(), MRI,
  2992. m_GPtrAdd(m_Reg(LoadPtr), m_ICst(Idx)))) {
  2993. LoadPtr = Load->getOperand(1).getReg();
  2994. Idx = 0;
  2995. }
  2996. // Don't combine things like a[i], a[i] -> a bigger load.
  2997. if (!SeenIdx.insert(Idx).second)
  2998. return std::nullopt;
  2999. // Every load must share the same base pointer; don't combine things like:
  3000. //
  3001. // a[i], b[i + 1] -> a bigger load.
  3002. if (!BasePtr.isValid())
  3003. BasePtr = LoadPtr;
  3004. if (BasePtr != LoadPtr)
  3005. return std::nullopt;
  3006. if (Idx < LowestIdx) {
  3007. LowestIdx = Idx;
  3008. LowestIdxLoad = Load;
  3009. }
  3010. // Keep track of the byte offset that this load ends up at. If we have seen
  3011. // the byte offset, then stop here. We do not want to combine:
  3012. //
  3013. // a[i] << 16, a[i + k] << 16 -> a bigger load.
  3014. if (!MemOffset2Idx.try_emplace(DstPos, Idx).second)
  3015. return std::nullopt;
  3016. Loads.insert(Load);
  3017. // Keep track of the position of the earliest/latest loads in the pattern.
  3018. // We will check that there are no load fold barriers between them later
  3019. // on.
  3020. //
  3021. // FIXME: Is there a better way to check for load fold barriers?
  3022. if (!EarliestLoad || dominates(*Load, *EarliestLoad))
  3023. EarliestLoad = Load;
  3024. if (!LatestLoad || dominates(*LatestLoad, *Load))
  3025. LatestLoad = Load;
  3026. }
  3027. // We found a load for each register. Let's check if each load satisfies the
  3028. // pattern.
  3029. assert(Loads.size() == RegsToVisit.size() &&
  3030. "Expected to find a load for each register?");
  3031. assert(EarliestLoad != LatestLoad && EarliestLoad &&
  3032. LatestLoad && "Expected at least two loads?");
  3033. // Check if there are any stores, calls, etc. between any of the loads. If
  3034. // there are, then we can't safely perform the combine.
  3035. //
  3036. // MaxIter is chosen based off the (worst case) number of iterations it
  3037. // typically takes to succeed in the LLVM test suite plus some padding.
  3038. //
  3039. // FIXME: Is there a better way to check for load fold barriers?
  3040. const unsigned MaxIter = 20;
  3041. unsigned Iter = 0;
  3042. for (const auto &MI : instructionsWithoutDebug(EarliestLoad->getIterator(),
  3043. LatestLoad->getIterator())) {
  3044. if (Loads.count(&MI))
  3045. continue;
  3046. if (MI.isLoadFoldBarrier())
  3047. return std::nullopt;
  3048. if (Iter++ == MaxIter)
  3049. return std::nullopt;
  3050. }
  3051. return std::make_tuple(LowestIdxLoad, LowestIdx, LatestLoad);
  3052. }
  3053. bool CombinerHelper::matchLoadOrCombine(
  3054. MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
  3055. assert(MI.getOpcode() == TargetOpcode::G_OR);
  3056. MachineFunction &MF = *MI.getMF();
  3057. // Assuming a little-endian target, transform:
  3058. // s8 *a = ...
  3059. // s32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
  3060. // =>
  3061. // s32 val = *((i32)a)
  3062. //
  3063. // s8 *a = ...
  3064. // s32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
  3065. // =>
  3066. // s32 val = BSWAP(*((s32)a))
  3067. Register Dst = MI.getOperand(0).getReg();
  3068. LLT Ty = MRI.getType(Dst);
  3069. if (Ty.isVector())
  3070. return false;
  3071. // We need to combine at least two loads into this type. Since the smallest
  3072. // possible load is into a byte, we need at least a 16-bit wide type.
  3073. const unsigned WideMemSizeInBits = Ty.getSizeInBits();
  3074. if (WideMemSizeInBits < 16 || WideMemSizeInBits % 8 != 0)
  3075. return false;
  3076. // Match a collection of non-OR instructions in the pattern.
  3077. auto RegsToVisit = findCandidatesForLoadOrCombine(&MI);
  3078. if (!RegsToVisit)
  3079. return false;
  3080. // We have a collection of non-OR instructions. Figure out how wide each of
  3081. // the small loads should be based off of the number of potential loads we
  3082. // found.
  3083. const unsigned NarrowMemSizeInBits = WideMemSizeInBits / RegsToVisit->size();
  3084. if (NarrowMemSizeInBits % 8 != 0)
  3085. return false;
  3086. // Check if each register feeding into each OR is a load from the same
  3087. // base pointer + some arithmetic.
  3088. //
  3089. // e.g. a[0], a[1] << 8, a[2] << 16, etc.
  3090. //
  3091. // Also verify that each of these ends up putting a[i] into the same memory
  3092. // offset as a load into a wide type would.
  3093. SmallDenseMap<int64_t, int64_t, 8> MemOffset2Idx;
  3094. GZExtLoad *LowestIdxLoad, *LatestLoad;
  3095. int64_t LowestIdx;
  3096. auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine(
  3097. MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits);
  3098. if (!MaybeLoadInfo)
  3099. return false;
  3100. std::tie(LowestIdxLoad, LowestIdx, LatestLoad) = *MaybeLoadInfo;
  3101. // We have a bunch of loads being OR'd together. Using the addresses + offsets
  3102. // we found before, check if this corresponds to a big or little endian byte
  3103. // pattern. If it does, then we can represent it using a load + possibly a
  3104. // BSWAP.
  3105. bool IsBigEndianTarget = MF.getDataLayout().isBigEndian();
  3106. std::optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx);
  3107. if (!IsBigEndian)
  3108. return false;
  3109. bool NeedsBSwap = IsBigEndianTarget != *IsBigEndian;
  3110. if (NeedsBSwap && !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {Ty}}))
  3111. return false;
  3112. // Make sure that the load from the lowest index produces offset 0 in the
  3113. // final value.
  3114. //
  3115. // This ensures that we won't combine something like this:
  3116. //
  3117. // load x[i] -> byte 2
  3118. // load x[i+1] -> byte 0 ---> wide_load x[i]
  3119. // load x[i+2] -> byte 1
  3120. const unsigned NumLoadsInTy = WideMemSizeInBits / NarrowMemSizeInBits;
  3121. const unsigned ZeroByteOffset =
  3122. *IsBigEndian
  3123. ? bigEndianByteAt(NumLoadsInTy, 0)
  3124. : littleEndianByteAt(NumLoadsInTy, 0);
  3125. auto ZeroOffsetIdx = MemOffset2Idx.find(ZeroByteOffset);
  3126. if (ZeroOffsetIdx == MemOffset2Idx.end() ||
  3127. ZeroOffsetIdx->second != LowestIdx)
  3128. return false;
  3129. // We wil reuse the pointer from the load which ends up at byte offset 0. It
  3130. // may not use index 0.
  3131. Register Ptr = LowestIdxLoad->getPointerReg();
  3132. const MachineMemOperand &MMO = LowestIdxLoad->getMMO();
  3133. LegalityQuery::MemDesc MMDesc(MMO);
  3134. MMDesc.MemoryTy = Ty;
  3135. if (!isLegalOrBeforeLegalizer(
  3136. {TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}}))
  3137. return false;
  3138. auto PtrInfo = MMO.getPointerInfo();
  3139. auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, WideMemSizeInBits / 8);
  3140. // Load must be allowed and fast on the target.
  3141. LLVMContext &C = MF.getFunction().getContext();
  3142. auto &DL = MF.getDataLayout();
  3143. unsigned Fast = 0;
  3144. if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) ||
  3145. !Fast)
  3146. return false;
  3147. MatchInfo = [=](MachineIRBuilder &MIB) {
  3148. MIB.setInstrAndDebugLoc(*LatestLoad);
  3149. Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst;
  3150. MIB.buildLoad(LoadDst, Ptr, *NewMMO);
  3151. if (NeedsBSwap)
  3152. MIB.buildBSwap(Dst, LoadDst);
  3153. };
  3154. return true;
  3155. }
  3156. /// Check if the store \p Store is a truncstore that can be merged. That is,
  3157. /// it's a store of a shifted value of \p SrcVal. If \p SrcVal is an empty
  3158. /// Register then it does not need to match and SrcVal is set to the source
  3159. /// value found.
  3160. /// On match, returns the start byte offset of the \p SrcVal that is being
  3161. /// stored.
  3162. static std::optional<int64_t>
  3163. getTruncStoreByteOffset(GStore &Store, Register &SrcVal,
  3164. MachineRegisterInfo &MRI) {
  3165. Register TruncVal;
  3166. if (!mi_match(Store.getValueReg(), MRI, m_GTrunc(m_Reg(TruncVal))))
  3167. return std::nullopt;
  3168. // The shift amount must be a constant multiple of the narrow type.
  3169. // It is translated to the offset address in the wide source value "y".
  3170. //
  3171. // x = G_LSHR y, ShiftAmtC
  3172. // s8 z = G_TRUNC x
  3173. // store z, ...
  3174. Register FoundSrcVal;
  3175. int64_t ShiftAmt;
  3176. if (!mi_match(TruncVal, MRI,
  3177. m_any_of(m_GLShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt)),
  3178. m_GAShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt))))) {
  3179. if (!SrcVal.isValid() || TruncVal == SrcVal) {
  3180. if (!SrcVal.isValid())
  3181. SrcVal = TruncVal;
  3182. return 0; // If it's the lowest index store.
  3183. }
  3184. return std::nullopt;
  3185. }
  3186. unsigned NarrowBits = Store.getMMO().getMemoryType().getScalarSizeInBits();
  3187. if (ShiftAmt % NarrowBits!= 0)
  3188. return std::nullopt;
  3189. const unsigned Offset = ShiftAmt / NarrowBits;
  3190. if (SrcVal.isValid() && FoundSrcVal != SrcVal)
  3191. return std::nullopt;
  3192. if (!SrcVal.isValid())
  3193. SrcVal = FoundSrcVal;
  3194. else if (MRI.getType(SrcVal) != MRI.getType(FoundSrcVal))
  3195. return std::nullopt;
  3196. return Offset;
  3197. }
  3198. /// Match a pattern where a wide type scalar value is stored by several narrow
  3199. /// stores. Fold it into a single store or a BSWAP and a store if the targets
  3200. /// supports it.
  3201. ///
  3202. /// Assuming little endian target:
  3203. /// i8 *p = ...
  3204. /// i32 val = ...
  3205. /// p[0] = (val >> 0) & 0xFF;
  3206. /// p[1] = (val >> 8) & 0xFF;
  3207. /// p[2] = (val >> 16) & 0xFF;
  3208. /// p[3] = (val >> 24) & 0xFF;
  3209. /// =>
  3210. /// *((i32)p) = val;
  3211. ///
  3212. /// i8 *p = ...
  3213. /// i32 val = ...
  3214. /// p[0] = (val >> 24) & 0xFF;
  3215. /// p[1] = (val >> 16) & 0xFF;
  3216. /// p[2] = (val >> 8) & 0xFF;
  3217. /// p[3] = (val >> 0) & 0xFF;
  3218. /// =>
  3219. /// *((i32)p) = BSWAP(val);
  3220. bool CombinerHelper::matchTruncStoreMerge(MachineInstr &MI,
  3221. MergeTruncStoresInfo &MatchInfo) {
  3222. auto &StoreMI = cast<GStore>(MI);
  3223. LLT MemTy = StoreMI.getMMO().getMemoryType();
  3224. // We only handle merging simple stores of 1-4 bytes.
  3225. if (!MemTy.isScalar())
  3226. return false;
  3227. switch (MemTy.getSizeInBits()) {
  3228. case 8:
  3229. case 16:
  3230. case 32:
  3231. break;
  3232. default:
  3233. return false;
  3234. }
  3235. if (!StoreMI.isSimple())
  3236. return false;
  3237. // We do a simple search for mergeable stores prior to this one.
  3238. // Any potential alias hazard along the way terminates the search.
  3239. SmallVector<GStore *> FoundStores;
  3240. // We're looking for:
  3241. // 1) a (store(trunc(...)))
  3242. // 2) of an LSHR/ASHR of a single wide value, by the appropriate shift to get
  3243. // the partial value stored.
  3244. // 3) where the offsets form either a little or big-endian sequence.
  3245. auto &LastStore = StoreMI;
  3246. // The single base pointer that all stores must use.
  3247. Register BaseReg;
  3248. int64_t LastOffset;
  3249. if (!mi_match(LastStore.getPointerReg(), MRI,
  3250. m_GPtrAdd(m_Reg(BaseReg), m_ICst(LastOffset)))) {
  3251. BaseReg = LastStore.getPointerReg();
  3252. LastOffset = 0;
  3253. }
  3254. GStore *LowestIdxStore = &LastStore;
  3255. int64_t LowestIdxOffset = LastOffset;
  3256. Register WideSrcVal;
  3257. auto LowestShiftAmt = getTruncStoreByteOffset(LastStore, WideSrcVal, MRI);
  3258. if (!LowestShiftAmt)
  3259. return false; // Didn't match a trunc.
  3260. assert(WideSrcVal.isValid());
  3261. LLT WideStoreTy = MRI.getType(WideSrcVal);
  3262. // The wide type might not be a multiple of the memory type, e.g. s48 and s32.
  3263. if (WideStoreTy.getSizeInBits() % MemTy.getSizeInBits() != 0)
  3264. return false;
  3265. const unsigned NumStoresRequired =
  3266. WideStoreTy.getSizeInBits() / MemTy.getSizeInBits();
  3267. SmallVector<int64_t, 8> OffsetMap(NumStoresRequired, INT64_MAX);
  3268. OffsetMap[*LowestShiftAmt] = LastOffset;
  3269. FoundStores.emplace_back(&LastStore);
  3270. // Search the block up for more stores.
  3271. // We use a search threshold of 10 instructions here because the combiner
  3272. // works top-down within a block, and we don't want to search an unbounded
  3273. // number of predecessor instructions trying to find matching stores.
  3274. // If we moved this optimization into a separate pass then we could probably
  3275. // use a more efficient search without having a hard-coded threshold.
  3276. const int MaxInstsToCheck = 10;
  3277. int NumInstsChecked = 0;
  3278. for (auto II = ++LastStore.getReverseIterator();
  3279. II != LastStore.getParent()->rend() && NumInstsChecked < MaxInstsToCheck;
  3280. ++II) {
  3281. NumInstsChecked++;
  3282. GStore *NewStore;
  3283. if ((NewStore = dyn_cast<GStore>(&*II))) {
  3284. if (NewStore->getMMO().getMemoryType() != MemTy || !NewStore->isSimple())
  3285. break;
  3286. } else if (II->isLoadFoldBarrier() || II->mayLoad()) {
  3287. break;
  3288. } else {
  3289. continue; // This is a safe instruction we can look past.
  3290. }
  3291. Register NewBaseReg;
  3292. int64_t MemOffset;
  3293. // Check we're storing to the same base + some offset.
  3294. if (!mi_match(NewStore->getPointerReg(), MRI,
  3295. m_GPtrAdd(m_Reg(NewBaseReg), m_ICst(MemOffset)))) {
  3296. NewBaseReg = NewStore->getPointerReg();
  3297. MemOffset = 0;
  3298. }
  3299. if (BaseReg != NewBaseReg)
  3300. break;
  3301. auto ShiftByteOffset = getTruncStoreByteOffset(*NewStore, WideSrcVal, MRI);
  3302. if (!ShiftByteOffset)
  3303. break;
  3304. if (MemOffset < LowestIdxOffset) {
  3305. LowestIdxOffset = MemOffset;
  3306. LowestIdxStore = NewStore;
  3307. }
  3308. // Map the offset in the store and the offset in the combined value, and
  3309. // early return if it has been set before.
  3310. if (*ShiftByteOffset < 0 || *ShiftByteOffset >= NumStoresRequired ||
  3311. OffsetMap[*ShiftByteOffset] != INT64_MAX)
  3312. break;
  3313. OffsetMap[*ShiftByteOffset] = MemOffset;
  3314. FoundStores.emplace_back(NewStore);
  3315. // Reset counter since we've found a matching inst.
  3316. NumInstsChecked = 0;
  3317. if (FoundStores.size() == NumStoresRequired)
  3318. break;
  3319. }
  3320. if (FoundStores.size() != NumStoresRequired) {
  3321. return false;
  3322. }
  3323. const auto &DL = LastStore.getMF()->getDataLayout();
  3324. auto &C = LastStore.getMF()->getFunction().getContext();
  3325. // Check that a store of the wide type is both allowed and fast on the target
  3326. unsigned Fast = 0;
  3327. bool Allowed = getTargetLowering().allowsMemoryAccess(
  3328. C, DL, WideStoreTy, LowestIdxStore->getMMO(), &Fast);
  3329. if (!Allowed || !Fast)
  3330. return false;
  3331. // Check if the pieces of the value are going to the expected places in memory
  3332. // to merge the stores.
  3333. unsigned NarrowBits = MemTy.getScalarSizeInBits();
  3334. auto checkOffsets = [&](bool MatchLittleEndian) {
  3335. if (MatchLittleEndian) {
  3336. for (unsigned i = 0; i != NumStoresRequired; ++i)
  3337. if (OffsetMap[i] != i * (NarrowBits / 8) + LowestIdxOffset)
  3338. return false;
  3339. } else { // MatchBigEndian by reversing loop counter.
  3340. for (unsigned i = 0, j = NumStoresRequired - 1; i != NumStoresRequired;
  3341. ++i, --j)
  3342. if (OffsetMap[j] != i * (NarrowBits / 8) + LowestIdxOffset)
  3343. return false;
  3344. }
  3345. return true;
  3346. };
  3347. // Check if the offsets line up for the native data layout of this target.
  3348. bool NeedBswap = false;
  3349. bool NeedRotate = false;
  3350. if (!checkOffsets(DL.isLittleEndian())) {
  3351. // Special-case: check if byte offsets line up for the opposite endian.
  3352. if (NarrowBits == 8 && checkOffsets(DL.isBigEndian()))
  3353. NeedBswap = true;
  3354. else if (NumStoresRequired == 2 && checkOffsets(DL.isBigEndian()))
  3355. NeedRotate = true;
  3356. else
  3357. return false;
  3358. }
  3359. if (NeedBswap &&
  3360. !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {WideStoreTy}}))
  3361. return false;
  3362. if (NeedRotate &&
  3363. !isLegalOrBeforeLegalizer({TargetOpcode::G_ROTR, {WideStoreTy}}))
  3364. return false;
  3365. MatchInfo.NeedBSwap = NeedBswap;
  3366. MatchInfo.NeedRotate = NeedRotate;
  3367. MatchInfo.LowestIdxStore = LowestIdxStore;
  3368. MatchInfo.WideSrcVal = WideSrcVal;
  3369. MatchInfo.FoundStores = std::move(FoundStores);
  3370. return true;
  3371. }
  3372. void CombinerHelper::applyTruncStoreMerge(MachineInstr &MI,
  3373. MergeTruncStoresInfo &MatchInfo) {
  3374. Builder.setInstrAndDebugLoc(MI);
  3375. Register WideSrcVal = MatchInfo.WideSrcVal;
  3376. LLT WideStoreTy = MRI.getType(WideSrcVal);
  3377. if (MatchInfo.NeedBSwap) {
  3378. WideSrcVal = Builder.buildBSwap(WideStoreTy, WideSrcVal).getReg(0);
  3379. } else if (MatchInfo.NeedRotate) {
  3380. assert(WideStoreTy.getSizeInBits() % 2 == 0 &&
  3381. "Unexpected type for rotate");
  3382. auto RotAmt =
  3383. Builder.buildConstant(WideStoreTy, WideStoreTy.getSizeInBits() / 2);
  3384. WideSrcVal =
  3385. Builder.buildRotateRight(WideStoreTy, WideSrcVal, RotAmt).getReg(0);
  3386. }
  3387. Builder.buildStore(WideSrcVal, MatchInfo.LowestIdxStore->getPointerReg(),
  3388. MatchInfo.LowestIdxStore->getMMO().getPointerInfo(),
  3389. MatchInfo.LowestIdxStore->getMMO().getAlign());
  3390. // Erase the old stores.
  3391. for (auto *ST : MatchInfo.FoundStores)
  3392. ST->eraseFromParent();
  3393. }
  3394. bool CombinerHelper::matchExtendThroughPhis(MachineInstr &MI,
  3395. MachineInstr *&ExtMI) {
  3396. assert(MI.getOpcode() == TargetOpcode::G_PHI);
  3397. Register DstReg = MI.getOperand(0).getReg();
  3398. // TODO: Extending a vector may be expensive, don't do this until heuristics
  3399. // are better.
  3400. if (MRI.getType(DstReg).isVector())
  3401. return false;
  3402. // Try to match a phi, whose only use is an extend.
  3403. if (!MRI.hasOneNonDBGUse(DstReg))
  3404. return false;
  3405. ExtMI = &*MRI.use_instr_nodbg_begin(DstReg);
  3406. switch (ExtMI->getOpcode()) {
  3407. case TargetOpcode::G_ANYEXT:
  3408. return true; // G_ANYEXT is usually free.
  3409. case TargetOpcode::G_ZEXT:
  3410. case TargetOpcode::G_SEXT:
  3411. break;
  3412. default:
  3413. return false;
  3414. }
  3415. // If the target is likely to fold this extend away, don't propagate.
  3416. if (Builder.getTII().isExtendLikelyToBeFolded(*ExtMI, MRI))
  3417. return false;
  3418. // We don't want to propagate the extends unless there's a good chance that
  3419. // they'll be optimized in some way.
  3420. // Collect the unique incoming values.
  3421. SmallPtrSet<MachineInstr *, 4> InSrcs;
  3422. for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
  3423. auto *DefMI = getDefIgnoringCopies(MI.getOperand(Idx).getReg(), MRI);
  3424. switch (DefMI->getOpcode()) {
  3425. case TargetOpcode::G_LOAD:
  3426. case TargetOpcode::G_TRUNC:
  3427. case TargetOpcode::G_SEXT:
  3428. case TargetOpcode::G_ZEXT:
  3429. case TargetOpcode::G_ANYEXT:
  3430. case TargetOpcode::G_CONSTANT:
  3431. InSrcs.insert(getDefIgnoringCopies(MI.getOperand(Idx).getReg(), MRI));
  3432. // Don't try to propagate if there are too many places to create new
  3433. // extends, chances are it'll increase code size.
  3434. if (InSrcs.size() > 2)
  3435. return false;
  3436. break;
  3437. default:
  3438. return false;
  3439. }
  3440. }
  3441. return true;
  3442. }
  3443. void CombinerHelper::applyExtendThroughPhis(MachineInstr &MI,
  3444. MachineInstr *&ExtMI) {
  3445. assert(MI.getOpcode() == TargetOpcode::G_PHI);
  3446. Register DstReg = ExtMI->getOperand(0).getReg();
  3447. LLT ExtTy = MRI.getType(DstReg);
  3448. // Propagate the extension into the block of each incoming reg's block.
  3449. // Use a SetVector here because PHIs can have duplicate edges, and we want
  3450. // deterministic iteration order.
  3451. SmallSetVector<MachineInstr *, 8> SrcMIs;
  3452. SmallDenseMap<MachineInstr *, MachineInstr *, 8> OldToNewSrcMap;
  3453. for (unsigned SrcIdx = 1; SrcIdx < MI.getNumOperands(); SrcIdx += 2) {
  3454. auto *SrcMI = MRI.getVRegDef(MI.getOperand(SrcIdx).getReg());
  3455. if (!SrcMIs.insert(SrcMI))
  3456. continue;
  3457. // Build an extend after each src inst.
  3458. auto *MBB = SrcMI->getParent();
  3459. MachineBasicBlock::iterator InsertPt = ++SrcMI->getIterator();
  3460. if (InsertPt != MBB->end() && InsertPt->isPHI())
  3461. InsertPt = MBB->getFirstNonPHI();
  3462. Builder.setInsertPt(*SrcMI->getParent(), InsertPt);
  3463. Builder.setDebugLoc(MI.getDebugLoc());
  3464. auto NewExt = Builder.buildExtOrTrunc(ExtMI->getOpcode(), ExtTy,
  3465. SrcMI->getOperand(0).getReg());
  3466. OldToNewSrcMap[SrcMI] = NewExt;
  3467. }
  3468. // Create a new phi with the extended inputs.
  3469. Builder.setInstrAndDebugLoc(MI);
  3470. auto NewPhi = Builder.buildInstrNoInsert(TargetOpcode::G_PHI);
  3471. NewPhi.addDef(DstReg);
  3472. for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
  3473. if (!MO.isReg()) {
  3474. NewPhi.addMBB(MO.getMBB());
  3475. continue;
  3476. }
  3477. auto *NewSrc = OldToNewSrcMap[MRI.getVRegDef(MO.getReg())];
  3478. NewPhi.addUse(NewSrc->getOperand(0).getReg());
  3479. }
  3480. Builder.insertInstr(NewPhi);
  3481. ExtMI->eraseFromParent();
  3482. }
  3483. bool CombinerHelper::matchExtractVecEltBuildVec(MachineInstr &MI,
  3484. Register &Reg) {
  3485. assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
  3486. // If we have a constant index, look for a G_BUILD_VECTOR source
  3487. // and find the source register that the index maps to.
  3488. Register SrcVec = MI.getOperand(1).getReg();
  3489. LLT SrcTy = MRI.getType(SrcVec);
  3490. auto Cst = getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
  3491. if (!Cst || Cst->Value.getZExtValue() >= SrcTy.getNumElements())
  3492. return false;
  3493. unsigned VecIdx = Cst->Value.getZExtValue();
  3494. // Check if we have a build_vector or build_vector_trunc with an optional
  3495. // trunc in front.
  3496. MachineInstr *SrcVecMI = MRI.getVRegDef(SrcVec);
  3497. if (SrcVecMI->getOpcode() == TargetOpcode::G_TRUNC) {
  3498. SrcVecMI = MRI.getVRegDef(SrcVecMI->getOperand(1).getReg());
  3499. }
  3500. if (SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR &&
  3501. SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR_TRUNC)
  3502. return false;
  3503. EVT Ty(getMVTForLLT(SrcTy));
  3504. if (!MRI.hasOneNonDBGUse(SrcVec) &&
  3505. !getTargetLowering().aggressivelyPreferBuildVectorSources(Ty))
  3506. return false;
  3507. Reg = SrcVecMI->getOperand(VecIdx + 1).getReg();
  3508. return true;
  3509. }
  3510. void CombinerHelper::applyExtractVecEltBuildVec(MachineInstr &MI,
  3511. Register &Reg) {
  3512. // Check the type of the register, since it may have come from a
  3513. // G_BUILD_VECTOR_TRUNC.
  3514. LLT ScalarTy = MRI.getType(Reg);
  3515. Register DstReg = MI.getOperand(0).getReg();
  3516. LLT DstTy = MRI.getType(DstReg);
  3517. Builder.setInstrAndDebugLoc(MI);
  3518. if (ScalarTy != DstTy) {
  3519. assert(ScalarTy.getSizeInBits() > DstTy.getSizeInBits());
  3520. Builder.buildTrunc(DstReg, Reg);
  3521. MI.eraseFromParent();
  3522. return;
  3523. }
  3524. replaceSingleDefInstWithReg(MI, Reg);
  3525. }
  3526. bool CombinerHelper::matchExtractAllEltsFromBuildVector(
  3527. MachineInstr &MI,
  3528. SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) {
  3529. assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
  3530. // This combine tries to find build_vector's which have every source element
  3531. // extracted using G_EXTRACT_VECTOR_ELT. This can happen when transforms like
  3532. // the masked load scalarization is run late in the pipeline. There's already
  3533. // a combine for a similar pattern starting from the extract, but that
  3534. // doesn't attempt to do it if there are multiple uses of the build_vector,
  3535. // which in this case is true. Starting the combine from the build_vector
  3536. // feels more natural than trying to find sibling nodes of extracts.
  3537. // E.g.
  3538. // %vec(<4 x s32>) = G_BUILD_VECTOR %s1(s32), %s2, %s3, %s4
  3539. // %ext1 = G_EXTRACT_VECTOR_ELT %vec, 0
  3540. // %ext2 = G_EXTRACT_VECTOR_ELT %vec, 1
  3541. // %ext3 = G_EXTRACT_VECTOR_ELT %vec, 2
  3542. // %ext4 = G_EXTRACT_VECTOR_ELT %vec, 3
  3543. // ==>
  3544. // replace ext{1,2,3,4} with %s{1,2,3,4}
  3545. Register DstReg = MI.getOperand(0).getReg();
  3546. LLT DstTy = MRI.getType(DstReg);
  3547. unsigned NumElts = DstTy.getNumElements();
  3548. SmallBitVector ExtractedElts(NumElts);
  3549. for (MachineInstr &II : MRI.use_nodbg_instructions(DstReg)) {
  3550. if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT)
  3551. return false;
  3552. auto Cst = getIConstantVRegVal(II.getOperand(2).getReg(), MRI);
  3553. if (!Cst)
  3554. return false;
  3555. unsigned Idx = Cst->getZExtValue();
  3556. if (Idx >= NumElts)
  3557. return false; // Out of range.
  3558. ExtractedElts.set(Idx);
  3559. SrcDstPairs.emplace_back(
  3560. std::make_pair(MI.getOperand(Idx + 1).getReg(), &II));
  3561. }
  3562. // Match if every element was extracted.
  3563. return ExtractedElts.all();
  3564. }
  3565. void CombinerHelper::applyExtractAllEltsFromBuildVector(
  3566. MachineInstr &MI,
  3567. SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) {
  3568. assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
  3569. for (auto &Pair : SrcDstPairs) {
  3570. auto *ExtMI = Pair.second;
  3571. replaceRegWith(MRI, ExtMI->getOperand(0).getReg(), Pair.first);
  3572. ExtMI->eraseFromParent();
  3573. }
  3574. MI.eraseFromParent();
  3575. }
  3576. void CombinerHelper::applyBuildFn(
  3577. MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
  3578. Builder.setInstrAndDebugLoc(MI);
  3579. MatchInfo(Builder);
  3580. MI.eraseFromParent();
  3581. }
  3582. void CombinerHelper::applyBuildFnNoErase(
  3583. MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
  3584. Builder.setInstrAndDebugLoc(MI);
  3585. MatchInfo(Builder);
  3586. }
  3587. bool CombinerHelper::matchOrShiftToFunnelShift(MachineInstr &MI,
  3588. BuildFnTy &MatchInfo) {
  3589. assert(MI.getOpcode() == TargetOpcode::G_OR);
  3590. Register Dst = MI.getOperand(0).getReg();
  3591. LLT Ty = MRI.getType(Dst);
  3592. unsigned BitWidth = Ty.getScalarSizeInBits();
  3593. Register ShlSrc, ShlAmt, LShrSrc, LShrAmt, Amt;
  3594. unsigned FshOpc = 0;
  3595. // Match (or (shl ...), (lshr ...)).
  3596. if (!mi_match(Dst, MRI,
  3597. // m_GOr() handles the commuted version as well.
  3598. m_GOr(m_GShl(m_Reg(ShlSrc), m_Reg(ShlAmt)),
  3599. m_GLShr(m_Reg(LShrSrc), m_Reg(LShrAmt)))))
  3600. return false;
  3601. // Given constants C0 and C1 such that C0 + C1 is bit-width:
  3602. // (or (shl x, C0), (lshr y, C1)) -> (fshl x, y, C0) or (fshr x, y, C1)
  3603. int64_t CstShlAmt, CstLShrAmt;
  3604. if (mi_match(ShlAmt, MRI, m_ICstOrSplat(CstShlAmt)) &&
  3605. mi_match(LShrAmt, MRI, m_ICstOrSplat(CstLShrAmt)) &&
  3606. CstShlAmt + CstLShrAmt == BitWidth) {
  3607. FshOpc = TargetOpcode::G_FSHR;
  3608. Amt = LShrAmt;
  3609. } else if (mi_match(LShrAmt, MRI,
  3610. m_GSub(m_SpecificICstOrSplat(BitWidth), m_Reg(Amt))) &&
  3611. ShlAmt == Amt) {
  3612. // (or (shl x, amt), (lshr y, (sub bw, amt))) -> (fshl x, y, amt)
  3613. FshOpc = TargetOpcode::G_FSHL;
  3614. } else if (mi_match(ShlAmt, MRI,
  3615. m_GSub(m_SpecificICstOrSplat(BitWidth), m_Reg(Amt))) &&
  3616. LShrAmt == Amt) {
  3617. // (or (shl x, (sub bw, amt)), (lshr y, amt)) -> (fshr x, y, amt)
  3618. FshOpc = TargetOpcode::G_FSHR;
  3619. } else {
  3620. return false;
  3621. }
  3622. LLT AmtTy = MRI.getType(Amt);
  3623. if (!isLegalOrBeforeLegalizer({FshOpc, {Ty, AmtTy}}))
  3624. return false;
  3625. MatchInfo = [=](MachineIRBuilder &B) {
  3626. B.buildInstr(FshOpc, {Dst}, {ShlSrc, LShrSrc, Amt});
  3627. };
  3628. return true;
  3629. }
  3630. /// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
  3631. bool CombinerHelper::matchFunnelShiftToRotate(MachineInstr &MI) {
  3632. unsigned Opc = MI.getOpcode();
  3633. assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
  3634. Register X = MI.getOperand(1).getReg();
  3635. Register Y = MI.getOperand(2).getReg();
  3636. if (X != Y)
  3637. return false;
  3638. unsigned RotateOpc =
  3639. Opc == TargetOpcode::G_FSHL ? TargetOpcode::G_ROTL : TargetOpcode::G_ROTR;
  3640. return isLegalOrBeforeLegalizer({RotateOpc, {MRI.getType(X), MRI.getType(Y)}});
  3641. }
  3642. void CombinerHelper::applyFunnelShiftToRotate(MachineInstr &MI) {
  3643. unsigned Opc = MI.getOpcode();
  3644. assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
  3645. bool IsFSHL = Opc == TargetOpcode::G_FSHL;
  3646. Observer.changingInstr(MI);
  3647. MI.setDesc(Builder.getTII().get(IsFSHL ? TargetOpcode::G_ROTL
  3648. : TargetOpcode::G_ROTR));
  3649. MI.removeOperand(2);
  3650. Observer.changedInstr(MI);
  3651. }
  3652. // Fold (rot x, c) -> (rot x, c % BitSize)
  3653. bool CombinerHelper::matchRotateOutOfRange(MachineInstr &MI) {
  3654. assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
  3655. MI.getOpcode() == TargetOpcode::G_ROTR);
  3656. unsigned Bitsize =
  3657. MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
  3658. Register AmtReg = MI.getOperand(2).getReg();
  3659. bool OutOfRange = false;
  3660. auto MatchOutOfRange = [Bitsize, &OutOfRange](const Constant *C) {
  3661. if (auto *CI = dyn_cast<ConstantInt>(C))
  3662. OutOfRange |= CI->getValue().uge(Bitsize);
  3663. return true;
  3664. };
  3665. return matchUnaryPredicate(MRI, AmtReg, MatchOutOfRange) && OutOfRange;
  3666. }
  3667. void CombinerHelper::applyRotateOutOfRange(MachineInstr &MI) {
  3668. assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
  3669. MI.getOpcode() == TargetOpcode::G_ROTR);
  3670. unsigned Bitsize =
  3671. MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
  3672. Builder.setInstrAndDebugLoc(MI);
  3673. Register Amt = MI.getOperand(2).getReg();
  3674. LLT AmtTy = MRI.getType(Amt);
  3675. auto Bits = Builder.buildConstant(AmtTy, Bitsize);
  3676. Amt = Builder.buildURem(AmtTy, MI.getOperand(2).getReg(), Bits).getReg(0);
  3677. Observer.changingInstr(MI);
  3678. MI.getOperand(2).setReg(Amt);
  3679. Observer.changedInstr(MI);
  3680. }
  3681. bool CombinerHelper::matchICmpToTrueFalseKnownBits(MachineInstr &MI,
  3682. int64_t &MatchInfo) {
  3683. assert(MI.getOpcode() == TargetOpcode::G_ICMP);
  3684. auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
  3685. auto KnownLHS = KB->getKnownBits(MI.getOperand(2).getReg());
  3686. auto KnownRHS = KB->getKnownBits(MI.getOperand(3).getReg());
  3687. std::optional<bool> KnownVal;
  3688. switch (Pred) {
  3689. default:
  3690. llvm_unreachable("Unexpected G_ICMP predicate?");
  3691. case CmpInst::ICMP_EQ:
  3692. KnownVal = KnownBits::eq(KnownLHS, KnownRHS);
  3693. break;
  3694. case CmpInst::ICMP_NE:
  3695. KnownVal = KnownBits::ne(KnownLHS, KnownRHS);
  3696. break;
  3697. case CmpInst::ICMP_SGE:
  3698. KnownVal = KnownBits::sge(KnownLHS, KnownRHS);
  3699. break;
  3700. case CmpInst::ICMP_SGT:
  3701. KnownVal = KnownBits::sgt(KnownLHS, KnownRHS);
  3702. break;
  3703. case CmpInst::ICMP_SLE:
  3704. KnownVal = KnownBits::sle(KnownLHS, KnownRHS);
  3705. break;
  3706. case CmpInst::ICMP_SLT:
  3707. KnownVal = KnownBits::slt(KnownLHS, KnownRHS);
  3708. break;
  3709. case CmpInst::ICMP_UGE:
  3710. KnownVal = KnownBits::uge(KnownLHS, KnownRHS);
  3711. break;
  3712. case CmpInst::ICMP_UGT:
  3713. KnownVal = KnownBits::ugt(KnownLHS, KnownRHS);
  3714. break;
  3715. case CmpInst::ICMP_ULE:
  3716. KnownVal = KnownBits::ule(KnownLHS, KnownRHS);
  3717. break;
  3718. case CmpInst::ICMP_ULT:
  3719. KnownVal = KnownBits::ult(KnownLHS, KnownRHS);
  3720. break;
  3721. }
  3722. if (!KnownVal)
  3723. return false;
  3724. MatchInfo =
  3725. *KnownVal
  3726. ? getICmpTrueVal(getTargetLowering(),
  3727. /*IsVector = */
  3728. MRI.getType(MI.getOperand(0).getReg()).isVector(),
  3729. /* IsFP = */ false)
  3730. : 0;
  3731. return true;
  3732. }
  3733. bool CombinerHelper::matchICmpToLHSKnownBits(
  3734. MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
  3735. assert(MI.getOpcode() == TargetOpcode::G_ICMP);
  3736. // Given:
  3737. //
  3738. // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
  3739. // %cmp = G_ICMP ne %x, 0
  3740. //
  3741. // Or:
  3742. //
  3743. // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
  3744. // %cmp = G_ICMP eq %x, 1
  3745. //
  3746. // We can replace %cmp with %x assuming true is 1 on the target.
  3747. auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
  3748. if (!CmpInst::isEquality(Pred))
  3749. return false;
  3750. Register Dst = MI.getOperand(0).getReg();
  3751. LLT DstTy = MRI.getType(Dst);
  3752. if (getICmpTrueVal(getTargetLowering(), DstTy.isVector(),
  3753. /* IsFP = */ false) != 1)
  3754. return false;
  3755. int64_t OneOrZero = Pred == CmpInst::ICMP_EQ;
  3756. if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(OneOrZero)))
  3757. return false;
  3758. Register LHS = MI.getOperand(2).getReg();
  3759. auto KnownLHS = KB->getKnownBits(LHS);
  3760. if (KnownLHS.getMinValue() != 0 || KnownLHS.getMaxValue() != 1)
  3761. return false;
  3762. // Make sure replacing Dst with the LHS is a legal operation.
  3763. LLT LHSTy = MRI.getType(LHS);
  3764. unsigned LHSSize = LHSTy.getSizeInBits();
  3765. unsigned DstSize = DstTy.getSizeInBits();
  3766. unsigned Op = TargetOpcode::COPY;
  3767. if (DstSize != LHSSize)
  3768. Op = DstSize < LHSSize ? TargetOpcode::G_TRUNC : TargetOpcode::G_ZEXT;
  3769. if (!isLegalOrBeforeLegalizer({Op, {DstTy, LHSTy}}))
  3770. return false;
  3771. MatchInfo = [=](MachineIRBuilder &B) { B.buildInstr(Op, {Dst}, {LHS}); };
  3772. return true;
  3773. }
  3774. // Replace (and (or x, c1), c2) with (and x, c2) iff c1 & c2 == 0
  3775. bool CombinerHelper::matchAndOrDisjointMask(
  3776. MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
  3777. assert(MI.getOpcode() == TargetOpcode::G_AND);
  3778. // Ignore vector types to simplify matching the two constants.
  3779. // TODO: do this for vectors and scalars via a demanded bits analysis.
  3780. LLT Ty = MRI.getType(MI.getOperand(0).getReg());
  3781. if (Ty.isVector())
  3782. return false;
  3783. Register Src;
  3784. Register AndMaskReg;
  3785. int64_t AndMaskBits;
  3786. int64_t OrMaskBits;
  3787. if (!mi_match(MI, MRI,
  3788. m_GAnd(m_GOr(m_Reg(Src), m_ICst(OrMaskBits)),
  3789. m_all_of(m_ICst(AndMaskBits), m_Reg(AndMaskReg)))))
  3790. return false;
  3791. // Check if OrMask could turn on any bits in Src.
  3792. if (AndMaskBits & OrMaskBits)
  3793. return false;
  3794. MatchInfo = [=, &MI](MachineIRBuilder &B) {
  3795. Observer.changingInstr(MI);
  3796. // Canonicalize the result to have the constant on the RHS.
  3797. if (MI.getOperand(1).getReg() == AndMaskReg)
  3798. MI.getOperand(2).setReg(AndMaskReg);
  3799. MI.getOperand(1).setReg(Src);
  3800. Observer.changedInstr(MI);
  3801. };
  3802. return true;
  3803. }
  3804. /// Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
  3805. bool CombinerHelper::matchBitfieldExtractFromSExtInReg(
  3806. MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
  3807. assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
  3808. Register Dst = MI.getOperand(0).getReg();
  3809. Register Src = MI.getOperand(1).getReg();
  3810. LLT Ty = MRI.getType(Src);
  3811. LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
  3812. if (!LI || !LI->isLegalOrCustom({TargetOpcode::G_SBFX, {Ty, ExtractTy}}))
  3813. return false;
  3814. int64_t Width = MI.getOperand(2).getImm();
  3815. Register ShiftSrc;
  3816. int64_t ShiftImm;
  3817. if (!mi_match(
  3818. Src, MRI,
  3819. m_OneNonDBGUse(m_any_of(m_GAShr(m_Reg(ShiftSrc), m_ICst(ShiftImm)),
  3820. m_GLShr(m_Reg(ShiftSrc), m_ICst(ShiftImm))))))
  3821. return false;
  3822. if (ShiftImm < 0 || ShiftImm + Width > Ty.getScalarSizeInBits())
  3823. return false;
  3824. MatchInfo = [=](MachineIRBuilder &B) {
  3825. auto Cst1 = B.buildConstant(ExtractTy, ShiftImm);
  3826. auto Cst2 = B.buildConstant(ExtractTy, Width);
  3827. B.buildSbfx(Dst, ShiftSrc, Cst1, Cst2);
  3828. };
  3829. return true;
  3830. }
  3831. /// Form a G_UBFX from "(a srl b) & mask", where b and mask are constants.
  3832. bool CombinerHelper::matchBitfieldExtractFromAnd(
  3833. MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
  3834. assert(MI.getOpcode() == TargetOpcode::G_AND);
  3835. Register Dst = MI.getOperand(0).getReg();
  3836. LLT Ty = MRI.getType(Dst);
  3837. LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
  3838. if (!getTargetLowering().isConstantUnsignedBitfieldExtractLegal(
  3839. TargetOpcode::G_UBFX, Ty, ExtractTy))
  3840. return false;
  3841. int64_t AndImm, LSBImm;
  3842. Register ShiftSrc;
  3843. const unsigned Size = Ty.getScalarSizeInBits();
  3844. if (!mi_match(MI.getOperand(0).getReg(), MRI,
  3845. m_GAnd(m_OneNonDBGUse(m_GLShr(m_Reg(ShiftSrc), m_ICst(LSBImm))),
  3846. m_ICst(AndImm))))
  3847. return false;
  3848. // The mask is a mask of the low bits iff imm & (imm+1) == 0.
  3849. auto MaybeMask = static_cast<uint64_t>(AndImm);
  3850. if (MaybeMask & (MaybeMask + 1))
  3851. return false;
  3852. // LSB must fit within the register.
  3853. if (static_cast<uint64_t>(LSBImm) >= Size)
  3854. return false;
  3855. uint64_t Width = APInt(Size, AndImm).countTrailingOnes();
  3856. MatchInfo = [=](MachineIRBuilder &B) {
  3857. auto WidthCst = B.buildConstant(ExtractTy, Width);
  3858. auto LSBCst = B.buildConstant(ExtractTy, LSBImm);
  3859. B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {ShiftSrc, LSBCst, WidthCst});
  3860. };
  3861. return true;
  3862. }
  3863. bool CombinerHelper::matchBitfieldExtractFromShr(
  3864. MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
  3865. const unsigned Opcode = MI.getOpcode();
  3866. assert(Opcode == TargetOpcode::G_ASHR || Opcode == TargetOpcode::G_LSHR);
  3867. const Register Dst = MI.getOperand(0).getReg();
  3868. const unsigned ExtrOpcode = Opcode == TargetOpcode::G_ASHR
  3869. ? TargetOpcode::G_SBFX
  3870. : TargetOpcode::G_UBFX;
  3871. // Check if the type we would use for the extract is legal
  3872. LLT Ty = MRI.getType(Dst);
  3873. LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
  3874. if (!LI || !LI->isLegalOrCustom({ExtrOpcode, {Ty, ExtractTy}}))
  3875. return false;
  3876. Register ShlSrc;
  3877. int64_t ShrAmt;
  3878. int64_t ShlAmt;
  3879. const unsigned Size = Ty.getScalarSizeInBits();
  3880. // Try to match shr (shl x, c1), c2
  3881. if (!mi_match(Dst, MRI,
  3882. m_BinOp(Opcode,
  3883. m_OneNonDBGUse(m_GShl(m_Reg(ShlSrc), m_ICst(ShlAmt))),
  3884. m_ICst(ShrAmt))))
  3885. return false;
  3886. // Make sure that the shift sizes can fit a bitfield extract
  3887. if (ShlAmt < 0 || ShlAmt > ShrAmt || ShrAmt >= Size)
  3888. return false;
  3889. // Skip this combine if the G_SEXT_INREG combine could handle it
  3890. if (Opcode == TargetOpcode::G_ASHR && ShlAmt == ShrAmt)
  3891. return false;
  3892. // Calculate start position and width of the extract
  3893. const int64_t Pos = ShrAmt - ShlAmt;
  3894. const int64_t Width = Size - ShrAmt;
  3895. MatchInfo = [=](MachineIRBuilder &B) {
  3896. auto WidthCst = B.buildConstant(ExtractTy, Width);
  3897. auto PosCst = B.buildConstant(ExtractTy, Pos);
  3898. B.buildInstr(ExtrOpcode, {Dst}, {ShlSrc, PosCst, WidthCst});
  3899. };
  3900. return true;
  3901. }
  3902. bool CombinerHelper::matchBitfieldExtractFromShrAnd(
  3903. MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
  3904. const unsigned Opcode = MI.getOpcode();
  3905. assert(Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_ASHR);
  3906. const Register Dst = MI.getOperand(0).getReg();
  3907. LLT Ty = MRI.getType(Dst);
  3908. LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
  3909. if (!getTargetLowering().isConstantUnsignedBitfieldExtractLegal(
  3910. TargetOpcode::G_UBFX, Ty, ExtractTy))
  3911. return false;
  3912. // Try to match shr (and x, c1), c2
  3913. Register AndSrc;
  3914. int64_t ShrAmt;
  3915. int64_t SMask;
  3916. if (!mi_match(Dst, MRI,
  3917. m_BinOp(Opcode,
  3918. m_OneNonDBGUse(m_GAnd(m_Reg(AndSrc), m_ICst(SMask))),
  3919. m_ICst(ShrAmt))))
  3920. return false;
  3921. const unsigned Size = Ty.getScalarSizeInBits();
  3922. if (ShrAmt < 0 || ShrAmt >= Size)
  3923. return false;
  3924. // If the shift subsumes the mask, emit the 0 directly.
  3925. if (0 == (SMask >> ShrAmt)) {
  3926. MatchInfo = [=](MachineIRBuilder &B) {
  3927. B.buildConstant(Dst, 0);
  3928. };
  3929. return true;
  3930. }
  3931. // Check that ubfx can do the extraction, with no holes in the mask.
  3932. uint64_t UMask = SMask;
  3933. UMask |= maskTrailingOnes<uint64_t>(ShrAmt);
  3934. UMask &= maskTrailingOnes<uint64_t>(Size);
  3935. if (!isMask_64(UMask))
  3936. return false;
  3937. // Calculate start position and width of the extract.
  3938. const int64_t Pos = ShrAmt;
  3939. const int64_t Width = countTrailingOnes(UMask) - ShrAmt;
  3940. // It's preferable to keep the shift, rather than form G_SBFX.
  3941. // TODO: remove the G_AND via demanded bits analysis.
  3942. if (Opcode == TargetOpcode::G_ASHR && Width + ShrAmt == Size)
  3943. return false;
  3944. MatchInfo = [=](MachineIRBuilder &B) {
  3945. auto WidthCst = B.buildConstant(ExtractTy, Width);
  3946. auto PosCst = B.buildConstant(ExtractTy, Pos);
  3947. B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {AndSrc, PosCst, WidthCst});
  3948. };
  3949. return true;
  3950. }
  3951. bool CombinerHelper::reassociationCanBreakAddressingModePattern(
  3952. MachineInstr &PtrAdd) {
  3953. assert(PtrAdd.getOpcode() == TargetOpcode::G_PTR_ADD);
  3954. Register Src1Reg = PtrAdd.getOperand(1).getReg();
  3955. MachineInstr *Src1Def = getOpcodeDef(TargetOpcode::G_PTR_ADD, Src1Reg, MRI);
  3956. if (!Src1Def)
  3957. return false;
  3958. Register Src2Reg = PtrAdd.getOperand(2).getReg();
  3959. if (MRI.hasOneNonDBGUse(Src1Reg))
  3960. return false;
  3961. auto C1 = getIConstantVRegVal(Src1Def->getOperand(2).getReg(), MRI);
  3962. if (!C1)
  3963. return false;
  3964. auto C2 = getIConstantVRegVal(Src2Reg, MRI);
  3965. if (!C2)
  3966. return false;
  3967. const APInt &C1APIntVal = *C1;
  3968. const APInt &C2APIntVal = *C2;
  3969. const int64_t CombinedValue = (C1APIntVal + C2APIntVal).getSExtValue();
  3970. for (auto &UseMI : MRI.use_nodbg_instructions(Src1Reg)) {
  3971. // This combine may end up running before ptrtoint/inttoptr combines
  3972. // manage to eliminate redundant conversions, so try to look through them.
  3973. MachineInstr *ConvUseMI = &UseMI;
  3974. unsigned ConvUseOpc = ConvUseMI->getOpcode();
  3975. while (ConvUseOpc == TargetOpcode::G_INTTOPTR ||
  3976. ConvUseOpc == TargetOpcode::G_PTRTOINT) {
  3977. Register DefReg = ConvUseMI->getOperand(0).getReg();
  3978. if (!MRI.hasOneNonDBGUse(DefReg))
  3979. break;
  3980. ConvUseMI = &*MRI.use_instr_nodbg_begin(DefReg);
  3981. ConvUseOpc = ConvUseMI->getOpcode();
  3982. }
  3983. auto LoadStore = ConvUseOpc == TargetOpcode::G_LOAD ||
  3984. ConvUseOpc == TargetOpcode::G_STORE;
  3985. if (!LoadStore)
  3986. continue;
  3987. // Is x[offset2] already not a legal addressing mode? If so then
  3988. // reassociating the constants breaks nothing (we test offset2 because
  3989. // that's the one we hope to fold into the load or store).
  3990. TargetLoweringBase::AddrMode AM;
  3991. AM.HasBaseReg = true;
  3992. AM.BaseOffs = C2APIntVal.getSExtValue();
  3993. unsigned AS =
  3994. MRI.getType(ConvUseMI->getOperand(1).getReg()).getAddressSpace();
  3995. Type *AccessTy =
  3996. getTypeForLLT(MRI.getType(ConvUseMI->getOperand(0).getReg()),
  3997. PtrAdd.getMF()->getFunction().getContext());
  3998. const auto &TLI = *PtrAdd.getMF()->getSubtarget().getTargetLowering();
  3999. if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
  4000. AccessTy, AS))
  4001. continue;
  4002. // Would x[offset1+offset2] still be a legal addressing mode?
  4003. AM.BaseOffs = CombinedValue;
  4004. if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
  4005. AccessTy, AS))
  4006. return true;
  4007. }
  4008. return false;
  4009. }
  4010. bool CombinerHelper::matchReassocConstantInnerRHS(GPtrAdd &MI,
  4011. MachineInstr *RHS,
  4012. BuildFnTy &MatchInfo) {
  4013. // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
  4014. Register Src1Reg = MI.getOperand(1).getReg();
  4015. if (RHS->getOpcode() != TargetOpcode::G_ADD)
  4016. return false;
  4017. auto C2 = getIConstantVRegVal(RHS->getOperand(2).getReg(), MRI);
  4018. if (!C2)
  4019. return false;
  4020. MatchInfo = [=, &MI](MachineIRBuilder &B) {
  4021. LLT PtrTy = MRI.getType(MI.getOperand(0).getReg());
  4022. auto NewBase =
  4023. Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg());
  4024. Observer.changingInstr(MI);
  4025. MI.getOperand(1).setReg(NewBase.getReg(0));
  4026. MI.getOperand(2).setReg(RHS->getOperand(2).getReg());
  4027. Observer.changedInstr(MI);
  4028. };
  4029. return !reassociationCanBreakAddressingModePattern(MI);
  4030. }
  4031. bool CombinerHelper::matchReassocConstantInnerLHS(GPtrAdd &MI,
  4032. MachineInstr *LHS,
  4033. MachineInstr *RHS,
  4034. BuildFnTy &MatchInfo) {
  4035. // G_PTR_ADD (G_PTR_ADD X, C), Y) -> (G_PTR_ADD (G_PTR_ADD(X, Y), C)
  4036. // if and only if (G_PTR_ADD X, C) has one use.
  4037. Register LHSBase;
  4038. std::optional<ValueAndVReg> LHSCstOff;
  4039. if (!mi_match(MI.getBaseReg(), MRI,
  4040. m_OneNonDBGUse(m_GPtrAdd(m_Reg(LHSBase), m_GCst(LHSCstOff)))))
  4041. return false;
  4042. auto *LHSPtrAdd = cast<GPtrAdd>(LHS);
  4043. MatchInfo = [=, &MI](MachineIRBuilder &B) {
  4044. // When we change LHSPtrAdd's offset register we might cause it to use a reg
  4045. // before its def. Sink the instruction so the outer PTR_ADD to ensure this
  4046. // doesn't happen.
  4047. LHSPtrAdd->moveBefore(&MI);
  4048. Register RHSReg = MI.getOffsetReg();
  4049. // set VReg will cause type mismatch if it comes from extend/trunc
  4050. auto NewCst = B.buildConstant(MRI.getType(RHSReg), LHSCstOff->Value);
  4051. Observer.changingInstr(MI);
  4052. MI.getOperand(2).setReg(NewCst.getReg(0));
  4053. Observer.changedInstr(MI);
  4054. Observer.changingInstr(*LHSPtrAdd);
  4055. LHSPtrAdd->getOperand(2).setReg(RHSReg);
  4056. Observer.changedInstr(*LHSPtrAdd);
  4057. };
  4058. return !reassociationCanBreakAddressingModePattern(MI);
  4059. }
  4060. bool CombinerHelper::matchReassocFoldConstantsInSubTree(GPtrAdd &MI,
  4061. MachineInstr *LHS,
  4062. MachineInstr *RHS,
  4063. BuildFnTy &MatchInfo) {
  4064. // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
  4065. auto *LHSPtrAdd = dyn_cast<GPtrAdd>(LHS);
  4066. if (!LHSPtrAdd)
  4067. return false;
  4068. Register Src2Reg = MI.getOperand(2).getReg();
  4069. Register LHSSrc1 = LHSPtrAdd->getBaseReg();
  4070. Register LHSSrc2 = LHSPtrAdd->getOffsetReg();
  4071. auto C1 = getIConstantVRegVal(LHSSrc2, MRI);
  4072. if (!C1)
  4073. return false;
  4074. auto C2 = getIConstantVRegVal(Src2Reg, MRI);
  4075. if (!C2)
  4076. return false;
  4077. MatchInfo = [=, &MI](MachineIRBuilder &B) {
  4078. auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2);
  4079. Observer.changingInstr(MI);
  4080. MI.getOperand(1).setReg(LHSSrc1);
  4081. MI.getOperand(2).setReg(NewCst.getReg(0));
  4082. Observer.changedInstr(MI);
  4083. };
  4084. return !reassociationCanBreakAddressingModePattern(MI);
  4085. }
  4086. bool CombinerHelper::matchReassocPtrAdd(MachineInstr &MI,
  4087. BuildFnTy &MatchInfo) {
  4088. auto &PtrAdd = cast<GPtrAdd>(MI);
  4089. // We're trying to match a few pointer computation patterns here for
  4090. // re-association opportunities.
  4091. // 1) Isolating a constant operand to be on the RHS, e.g.:
  4092. // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
  4093. //
  4094. // 2) Folding two constants in each sub-tree as long as such folding
  4095. // doesn't break a legal addressing mode.
  4096. // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
  4097. //
  4098. // 3) Move a constant from the LHS of an inner op to the RHS of the outer.
  4099. // G_PTR_ADD (G_PTR_ADD X, C), Y) -> G_PTR_ADD (G_PTR_ADD(X, Y), C)
  4100. // iif (G_PTR_ADD X, C) has one use.
  4101. MachineInstr *LHS = MRI.getVRegDef(PtrAdd.getBaseReg());
  4102. MachineInstr *RHS = MRI.getVRegDef(PtrAdd.getOffsetReg());
  4103. // Try to match example 2.
  4104. if (matchReassocFoldConstantsInSubTree(PtrAdd, LHS, RHS, MatchInfo))
  4105. return true;
  4106. // Try to match example 3.
  4107. if (matchReassocConstantInnerLHS(PtrAdd, LHS, RHS, MatchInfo))
  4108. return true;
  4109. // Try to match example 1.
  4110. if (matchReassocConstantInnerRHS(PtrAdd, RHS, MatchInfo))
  4111. return true;
  4112. return false;
  4113. }
  4114. bool CombinerHelper::matchConstantFold(MachineInstr &MI, APInt &MatchInfo) {
  4115. Register Op1 = MI.getOperand(1).getReg();
  4116. Register Op2 = MI.getOperand(2).getReg();
  4117. auto MaybeCst = ConstantFoldBinOp(MI.getOpcode(), Op1, Op2, MRI);
  4118. if (!MaybeCst)
  4119. return false;
  4120. MatchInfo = *MaybeCst;
  4121. return true;
  4122. }
  4123. bool CombinerHelper::matchNarrowBinopFeedingAnd(
  4124. MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
  4125. // Look for a binop feeding into an AND with a mask:
  4126. //
  4127. // %add = G_ADD %lhs, %rhs
  4128. // %and = G_AND %add, 000...11111111
  4129. //
  4130. // Check if it's possible to perform the binop at a narrower width and zext
  4131. // back to the original width like so:
  4132. //
  4133. // %narrow_lhs = G_TRUNC %lhs
  4134. // %narrow_rhs = G_TRUNC %rhs
  4135. // %narrow_add = G_ADD %narrow_lhs, %narrow_rhs
  4136. // %new_add = G_ZEXT %narrow_add
  4137. // %and = G_AND %new_add, 000...11111111
  4138. //
  4139. // This can allow later combines to eliminate the G_AND if it turns out
  4140. // that the mask is irrelevant.
  4141. assert(MI.getOpcode() == TargetOpcode::G_AND);
  4142. Register Dst = MI.getOperand(0).getReg();
  4143. Register AndLHS = MI.getOperand(1).getReg();
  4144. Register AndRHS = MI.getOperand(2).getReg();
  4145. LLT WideTy = MRI.getType(Dst);
  4146. // If the potential binop has more than one use, then it's possible that one
  4147. // of those uses will need its full width.
  4148. if (!WideTy.isScalar() || !MRI.hasOneNonDBGUse(AndLHS))
  4149. return false;
  4150. // Check if the LHS feeding the AND is impacted by the high bits that we're
  4151. // masking out.
  4152. //
  4153. // e.g. for 64-bit x, y:
  4154. //
  4155. // add_64(x, y) & 65535 == zext(add_16(trunc(x), trunc(y))) & 65535
  4156. MachineInstr *LHSInst = getDefIgnoringCopies(AndLHS, MRI);
  4157. if (!LHSInst)
  4158. return false;
  4159. unsigned LHSOpc = LHSInst->getOpcode();
  4160. switch (LHSOpc) {
  4161. default:
  4162. return false;
  4163. case TargetOpcode::G_ADD:
  4164. case TargetOpcode::G_SUB:
  4165. case TargetOpcode::G_MUL:
  4166. case TargetOpcode::G_AND:
  4167. case TargetOpcode::G_OR:
  4168. case TargetOpcode::G_XOR:
  4169. break;
  4170. }
  4171. // Find the mask on the RHS.
  4172. auto Cst = getIConstantVRegValWithLookThrough(AndRHS, MRI);
  4173. if (!Cst)
  4174. return false;
  4175. auto Mask = Cst->Value;
  4176. if (!Mask.isMask())
  4177. return false;
  4178. // No point in combining if there's nothing to truncate.
  4179. unsigned NarrowWidth = Mask.countTrailingOnes();
  4180. if (NarrowWidth == WideTy.getSizeInBits())
  4181. return false;
  4182. LLT NarrowTy = LLT::scalar(NarrowWidth);
  4183. // Check if adding the zext + truncates could be harmful.
  4184. auto &MF = *MI.getMF();
  4185. const auto &TLI = getTargetLowering();
  4186. LLVMContext &Ctx = MF.getFunction().getContext();
  4187. auto &DL = MF.getDataLayout();
  4188. if (!TLI.isTruncateFree(WideTy, NarrowTy, DL, Ctx) ||
  4189. !TLI.isZExtFree(NarrowTy, WideTy, DL, Ctx))
  4190. return false;
  4191. if (!isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {NarrowTy, WideTy}}) ||
  4192. !isLegalOrBeforeLegalizer({TargetOpcode::G_ZEXT, {WideTy, NarrowTy}}))
  4193. return false;
  4194. Register BinOpLHS = LHSInst->getOperand(1).getReg();
  4195. Register BinOpRHS = LHSInst->getOperand(2).getReg();
  4196. MatchInfo = [=, &MI](MachineIRBuilder &B) {
  4197. auto NarrowLHS = Builder.buildTrunc(NarrowTy, BinOpLHS);
  4198. auto NarrowRHS = Builder.buildTrunc(NarrowTy, BinOpRHS);
  4199. auto NarrowBinOp =
  4200. Builder.buildInstr(LHSOpc, {NarrowTy}, {NarrowLHS, NarrowRHS});
  4201. auto Ext = Builder.buildZExt(WideTy, NarrowBinOp);
  4202. Observer.changingInstr(MI);
  4203. MI.getOperand(1).setReg(Ext.getReg(0));
  4204. Observer.changedInstr(MI);
  4205. };
  4206. return true;
  4207. }
  4208. bool CombinerHelper::matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo) {
  4209. unsigned Opc = MI.getOpcode();
  4210. assert(Opc == TargetOpcode::G_UMULO || Opc == TargetOpcode::G_SMULO);
  4211. if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(2)))
  4212. return false;
  4213. MatchInfo = [=, &MI](MachineIRBuilder &B) {
  4214. Observer.changingInstr(MI);
  4215. unsigned NewOpc = Opc == TargetOpcode::G_UMULO ? TargetOpcode::G_UADDO
  4216. : TargetOpcode::G_SADDO;
  4217. MI.setDesc(Builder.getTII().get(NewOpc));
  4218. MI.getOperand(3).setReg(MI.getOperand(2).getReg());
  4219. Observer.changedInstr(MI);
  4220. };
  4221. return true;
  4222. }
  4223. bool CombinerHelper::matchMulOBy0(MachineInstr &MI, BuildFnTy &MatchInfo) {
  4224. // (G_*MULO x, 0) -> 0 + no carry out
  4225. assert(MI.getOpcode() == TargetOpcode::G_UMULO ||
  4226. MI.getOpcode() == TargetOpcode::G_SMULO);
  4227. if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(0)))
  4228. return false;
  4229. Register Dst = MI.getOperand(0).getReg();
  4230. Register Carry = MI.getOperand(1).getReg();
  4231. if (!isConstantLegalOrBeforeLegalizer(MRI.getType(Dst)) ||
  4232. !isConstantLegalOrBeforeLegalizer(MRI.getType(Carry)))
  4233. return false;
  4234. MatchInfo = [=](MachineIRBuilder &B) {
  4235. B.buildConstant(Dst, 0);
  4236. B.buildConstant(Carry, 0);
  4237. };
  4238. return true;
  4239. }
  4240. bool CombinerHelper::matchAddOBy0(MachineInstr &MI, BuildFnTy &MatchInfo) {
  4241. // (G_*ADDO x, 0) -> x + no carry out
  4242. assert(MI.getOpcode() == TargetOpcode::G_UADDO ||
  4243. MI.getOpcode() == TargetOpcode::G_SADDO);
  4244. if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(0)))
  4245. return false;
  4246. Register Carry = MI.getOperand(1).getReg();
  4247. if (!isConstantLegalOrBeforeLegalizer(MRI.getType(Carry)))
  4248. return false;
  4249. Register Dst = MI.getOperand(0).getReg();
  4250. Register LHS = MI.getOperand(2).getReg();
  4251. MatchInfo = [=](MachineIRBuilder &B) {
  4252. B.buildCopy(Dst, LHS);
  4253. B.buildConstant(Carry, 0);
  4254. };
  4255. return true;
  4256. }
  4257. bool CombinerHelper::matchAddEToAddO(MachineInstr &MI, BuildFnTy &MatchInfo) {
  4258. // (G_*ADDE x, y, 0) -> (G_*ADDO x, y)
  4259. // (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
  4260. assert(MI.getOpcode() == TargetOpcode::G_UADDE ||
  4261. MI.getOpcode() == TargetOpcode::G_SADDE ||
  4262. MI.getOpcode() == TargetOpcode::G_USUBE ||
  4263. MI.getOpcode() == TargetOpcode::G_SSUBE);
  4264. if (!mi_match(MI.getOperand(4).getReg(), MRI, m_SpecificICstOrSplat(0)))
  4265. return false;
  4266. MatchInfo = [&](MachineIRBuilder &B) {
  4267. unsigned NewOpcode;
  4268. switch (MI.getOpcode()) {
  4269. case TargetOpcode::G_UADDE:
  4270. NewOpcode = TargetOpcode::G_UADDO;
  4271. break;
  4272. case TargetOpcode::G_SADDE:
  4273. NewOpcode = TargetOpcode::G_SADDO;
  4274. break;
  4275. case TargetOpcode::G_USUBE:
  4276. NewOpcode = TargetOpcode::G_USUBO;
  4277. break;
  4278. case TargetOpcode::G_SSUBE:
  4279. NewOpcode = TargetOpcode::G_SSUBO;
  4280. break;
  4281. }
  4282. Observer.changingInstr(MI);
  4283. MI.setDesc(B.getTII().get(NewOpcode));
  4284. MI.removeOperand(4);
  4285. Observer.changedInstr(MI);
  4286. };
  4287. return true;
  4288. }
  4289. bool CombinerHelper::matchSubAddSameReg(MachineInstr &MI,
  4290. BuildFnTy &MatchInfo) {
  4291. assert(MI.getOpcode() == TargetOpcode::G_SUB);
  4292. Register Dst = MI.getOperand(0).getReg();
  4293. // (x + y) - z -> x (if y == z)
  4294. // (x + y) - z -> y (if x == z)
  4295. Register X, Y, Z;
  4296. if (mi_match(Dst, MRI, m_GSub(m_GAdd(m_Reg(X), m_Reg(Y)), m_Reg(Z)))) {
  4297. Register ReplaceReg;
  4298. int64_t CstX, CstY;
  4299. if (Y == Z || (mi_match(Y, MRI, m_ICstOrSplat(CstY)) &&
  4300. mi_match(Z, MRI, m_SpecificICstOrSplat(CstY))))
  4301. ReplaceReg = X;
  4302. else if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
  4303. mi_match(Z, MRI, m_SpecificICstOrSplat(CstX))))
  4304. ReplaceReg = Y;
  4305. if (ReplaceReg) {
  4306. MatchInfo = [=](MachineIRBuilder &B) { B.buildCopy(Dst, ReplaceReg); };
  4307. return true;
  4308. }
  4309. }
  4310. // x - (y + z) -> 0 - y (if x == z)
  4311. // x - (y + z) -> 0 - z (if x == y)
  4312. if (mi_match(Dst, MRI, m_GSub(m_Reg(X), m_GAdd(m_Reg(Y), m_Reg(Z))))) {
  4313. Register ReplaceReg;
  4314. int64_t CstX;
  4315. if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
  4316. mi_match(Z, MRI, m_SpecificICstOrSplat(CstX))))
  4317. ReplaceReg = Y;
  4318. else if (X == Y || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
  4319. mi_match(Y, MRI, m_SpecificICstOrSplat(CstX))))
  4320. ReplaceReg = Z;
  4321. if (ReplaceReg) {
  4322. MatchInfo = [=](MachineIRBuilder &B) {
  4323. auto Zero = B.buildConstant(MRI.getType(Dst), 0);
  4324. B.buildSub(Dst, Zero, ReplaceReg);
  4325. };
  4326. return true;
  4327. }
  4328. }
  4329. return false;
  4330. }
  4331. MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) {
  4332. assert(MI.getOpcode() == TargetOpcode::G_UDIV);
  4333. auto &UDiv = cast<GenericMachineInstr>(MI);
  4334. Register Dst = UDiv.getReg(0);
  4335. Register LHS = UDiv.getReg(1);
  4336. Register RHS = UDiv.getReg(2);
  4337. LLT Ty = MRI.getType(Dst);
  4338. LLT ScalarTy = Ty.getScalarType();
  4339. const unsigned EltBits = ScalarTy.getScalarSizeInBits();
  4340. LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
  4341. LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
  4342. auto &MIB = Builder;
  4343. MIB.setInstrAndDebugLoc(MI);
  4344. bool UseNPQ = false;
  4345. SmallVector<Register, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
  4346. auto BuildUDIVPattern = [&](const Constant *C) {
  4347. auto *CI = cast<ConstantInt>(C);
  4348. const APInt &Divisor = CI->getValue();
  4349. bool SelNPQ = false;
  4350. APInt Magic(Divisor.getBitWidth(), 0);
  4351. unsigned PreShift = 0, PostShift = 0;
  4352. // Magic algorithm doesn't work for division by 1. We need to emit a select
  4353. // at the end.
  4354. // TODO: Use undef values for divisor of 1.
  4355. if (!Divisor.isOneValue()) {
  4356. UnsignedDivisionByConstantInfo magics =
  4357. UnsignedDivisionByConstantInfo::get(Divisor);
  4358. Magic = std::move(magics.Magic);
  4359. assert(magics.PreShift < Divisor.getBitWidth() &&
  4360. "We shouldn't generate an undefined shift!");
  4361. assert(magics.PostShift < Divisor.getBitWidth() &&
  4362. "We shouldn't generate an undefined shift!");
  4363. assert((!magics.IsAdd || magics.PreShift == 0) && "Unexpected pre-shift");
  4364. PreShift = magics.PreShift;
  4365. PostShift = magics.PostShift;
  4366. SelNPQ = magics.IsAdd;
  4367. }
  4368. PreShifts.push_back(
  4369. MIB.buildConstant(ScalarShiftAmtTy, PreShift).getReg(0));
  4370. MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magic).getReg(0));
  4371. NPQFactors.push_back(
  4372. MIB.buildConstant(ScalarTy,
  4373. SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
  4374. : APInt::getZero(EltBits))
  4375. .getReg(0));
  4376. PostShifts.push_back(
  4377. MIB.buildConstant(ScalarShiftAmtTy, PostShift).getReg(0));
  4378. UseNPQ |= SelNPQ;
  4379. return true;
  4380. };
  4381. // Collect the shifts/magic values from each element.
  4382. bool Matched = matchUnaryPredicate(MRI, RHS, BuildUDIVPattern);
  4383. (void)Matched;
  4384. assert(Matched && "Expected unary predicate match to succeed");
  4385. Register PreShift, PostShift, MagicFactor, NPQFactor;
  4386. auto *RHSDef = getOpcodeDef<GBuildVector>(RHS, MRI);
  4387. if (RHSDef) {
  4388. PreShift = MIB.buildBuildVector(ShiftAmtTy, PreShifts).getReg(0);
  4389. MagicFactor = MIB.buildBuildVector(Ty, MagicFactors).getReg(0);
  4390. NPQFactor = MIB.buildBuildVector(Ty, NPQFactors).getReg(0);
  4391. PostShift = MIB.buildBuildVector(ShiftAmtTy, PostShifts).getReg(0);
  4392. } else {
  4393. assert(MRI.getType(RHS).isScalar() &&
  4394. "Non-build_vector operation should have been a scalar");
  4395. PreShift = PreShifts[0];
  4396. MagicFactor = MagicFactors[0];
  4397. PostShift = PostShifts[0];
  4398. }
  4399. Register Q = LHS;
  4400. Q = MIB.buildLShr(Ty, Q, PreShift).getReg(0);
  4401. // Multiply the numerator (operand 0) by the magic value.
  4402. Q = MIB.buildUMulH(Ty, Q, MagicFactor).getReg(0);
  4403. if (UseNPQ) {
  4404. Register NPQ = MIB.buildSub(Ty, LHS, Q).getReg(0);
  4405. // For vectors we might have a mix of non-NPQ/NPQ paths, so use
  4406. // G_UMULH to act as a SRL-by-1 for NPQ, else multiply by zero.
  4407. if (Ty.isVector())
  4408. NPQ = MIB.buildUMulH(Ty, NPQ, NPQFactor).getReg(0);
  4409. else
  4410. NPQ = MIB.buildLShr(Ty, NPQ, MIB.buildConstant(ShiftAmtTy, 1)).getReg(0);
  4411. Q = MIB.buildAdd(Ty, NPQ, Q).getReg(0);
  4412. }
  4413. Q = MIB.buildLShr(Ty, Q, PostShift).getReg(0);
  4414. auto One = MIB.buildConstant(Ty, 1);
  4415. auto IsOne = MIB.buildICmp(
  4416. CmpInst::Predicate::ICMP_EQ,
  4417. Ty.isScalar() ? LLT::scalar(1) : Ty.changeElementSize(1), RHS, One);
  4418. return MIB.buildSelect(Ty, IsOne, LHS, Q);
  4419. }
  4420. bool CombinerHelper::matchUDivByConst(MachineInstr &MI) {
  4421. assert(MI.getOpcode() == TargetOpcode::G_UDIV);
  4422. Register Dst = MI.getOperand(0).getReg();
  4423. Register RHS = MI.getOperand(2).getReg();
  4424. LLT DstTy = MRI.getType(Dst);
  4425. auto *RHSDef = MRI.getVRegDef(RHS);
  4426. if (!isConstantOrConstantVector(*RHSDef, MRI))
  4427. return false;
  4428. auto &MF = *MI.getMF();
  4429. AttributeList Attr = MF.getFunction().getAttributes();
  4430. const auto &TLI = getTargetLowering();
  4431. LLVMContext &Ctx = MF.getFunction().getContext();
  4432. auto &DL = MF.getDataLayout();
  4433. if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, DL, Ctx), Attr))
  4434. return false;
  4435. // Don't do this for minsize because the instruction sequence is usually
  4436. // larger.
  4437. if (MF.getFunction().hasMinSize())
  4438. return false;
  4439. // Don't do this if the types are not going to be legal.
  4440. if (LI) {
  4441. if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}}))
  4442. return false;
  4443. if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMULH, {DstTy}}))
  4444. return false;
  4445. if (!isLegalOrBeforeLegalizer(
  4446. {TargetOpcode::G_ICMP,
  4447. {DstTy.isVector() ? DstTy.changeElementSize(1) : LLT::scalar(1),
  4448. DstTy}}))
  4449. return false;
  4450. }
  4451. auto CheckEltValue = [&](const Constant *C) {
  4452. if (auto *CI = dyn_cast_or_null<ConstantInt>(C))
  4453. return !CI->isZero();
  4454. return false;
  4455. };
  4456. return matchUnaryPredicate(MRI, RHS, CheckEltValue);
  4457. }
  4458. void CombinerHelper::applyUDivByConst(MachineInstr &MI) {
  4459. auto *NewMI = buildUDivUsingMul(MI);
  4460. replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
  4461. }
  4462. bool CombinerHelper::matchSDivByConst(MachineInstr &MI) {
  4463. assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
  4464. Register Dst = MI.getOperand(0).getReg();
  4465. Register RHS = MI.getOperand(2).getReg();
  4466. LLT DstTy = MRI.getType(Dst);
  4467. auto &MF = *MI.getMF();
  4468. AttributeList Attr = MF.getFunction().getAttributes();
  4469. const auto &TLI = getTargetLowering();
  4470. LLVMContext &Ctx = MF.getFunction().getContext();
  4471. auto &DL = MF.getDataLayout();
  4472. if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, DL, Ctx), Attr))
  4473. return false;
  4474. // Don't do this for minsize because the instruction sequence is usually
  4475. // larger.
  4476. if (MF.getFunction().hasMinSize())
  4477. return false;
  4478. // If the sdiv has an 'exact' flag we can use a simpler lowering.
  4479. if (MI.getFlag(MachineInstr::MIFlag::IsExact)) {
  4480. return matchUnaryPredicate(
  4481. MRI, RHS, [](const Constant *C) { return C && !C->isZeroValue(); });
  4482. }
  4483. // Don't support the general case for now.
  4484. return false;
  4485. }
  4486. void CombinerHelper::applySDivByConst(MachineInstr &MI) {
  4487. auto *NewMI = buildSDivUsingMul(MI);
  4488. replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
  4489. }
  4490. MachineInstr *CombinerHelper::buildSDivUsingMul(MachineInstr &MI) {
  4491. assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
  4492. auto &SDiv = cast<GenericMachineInstr>(MI);
  4493. Register Dst = SDiv.getReg(0);
  4494. Register LHS = SDiv.getReg(1);
  4495. Register RHS = SDiv.getReg(2);
  4496. LLT Ty = MRI.getType(Dst);
  4497. LLT ScalarTy = Ty.getScalarType();
  4498. LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
  4499. LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
  4500. auto &MIB = Builder;
  4501. MIB.setInstrAndDebugLoc(MI);
  4502. bool UseSRA = false;
  4503. SmallVector<Register, 16> Shifts, Factors;
  4504. auto *RHSDef = cast<GenericMachineInstr>(getDefIgnoringCopies(RHS, MRI));
  4505. bool IsSplat = getIConstantSplatVal(*RHSDef, MRI).has_value();
  4506. auto BuildSDIVPattern = [&](const Constant *C) {
  4507. // Don't recompute inverses for each splat element.
  4508. if (IsSplat && !Factors.empty()) {
  4509. Shifts.push_back(Shifts[0]);
  4510. Factors.push_back(Factors[0]);
  4511. return true;
  4512. }
  4513. auto *CI = cast<ConstantInt>(C);
  4514. APInt Divisor = CI->getValue();
  4515. unsigned Shift = Divisor.countTrailingZeros();
  4516. if (Shift) {
  4517. Divisor.ashrInPlace(Shift);
  4518. UseSRA = true;
  4519. }
  4520. // Calculate the multiplicative inverse modulo BW.
  4521. // 2^W requires W + 1 bits, so we have to extend and then truncate.
  4522. unsigned W = Divisor.getBitWidth();
  4523. APInt Factor = Divisor.zext(W + 1)
  4524. .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
  4525. .trunc(W);
  4526. Shifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0));
  4527. Factors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0));
  4528. return true;
  4529. };
  4530. // Collect all magic values from the build vector.
  4531. bool Matched = matchUnaryPredicate(MRI, RHS, BuildSDIVPattern);
  4532. (void)Matched;
  4533. assert(Matched && "Expected unary predicate match to succeed");
  4534. Register Shift, Factor;
  4535. if (Ty.isVector()) {
  4536. Shift = MIB.buildBuildVector(ShiftAmtTy, Shifts).getReg(0);
  4537. Factor = MIB.buildBuildVector(Ty, Factors).getReg(0);
  4538. } else {
  4539. Shift = Shifts[0];
  4540. Factor = Factors[0];
  4541. }
  4542. Register Res = LHS;
  4543. if (UseSRA)
  4544. Res = MIB.buildAShr(Ty, Res, Shift, MachineInstr::IsExact).getReg(0);
  4545. return MIB.buildMul(Ty, Res, Factor);
  4546. }
  4547. bool CombinerHelper::matchUMulHToLShr(MachineInstr &MI) {
  4548. assert(MI.getOpcode() == TargetOpcode::G_UMULH);
  4549. Register RHS = MI.getOperand(2).getReg();
  4550. Register Dst = MI.getOperand(0).getReg();
  4551. LLT Ty = MRI.getType(Dst);
  4552. LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
  4553. auto MatchPow2ExceptOne = [&](const Constant *C) {
  4554. if (auto *CI = dyn_cast<ConstantInt>(C))
  4555. return CI->getValue().isPowerOf2() && !CI->getValue().isOne();
  4556. return false;
  4557. };
  4558. if (!matchUnaryPredicate(MRI, RHS, MatchPow2ExceptOne, false))
  4559. return false;
  4560. return isLegalOrBeforeLegalizer({TargetOpcode::G_LSHR, {Ty, ShiftAmtTy}});
  4561. }
  4562. void CombinerHelper::applyUMulHToLShr(MachineInstr &MI) {
  4563. Register LHS = MI.getOperand(1).getReg();
  4564. Register RHS = MI.getOperand(2).getReg();
  4565. Register Dst = MI.getOperand(0).getReg();
  4566. LLT Ty = MRI.getType(Dst);
  4567. LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
  4568. unsigned NumEltBits = Ty.getScalarSizeInBits();
  4569. Builder.setInstrAndDebugLoc(MI);
  4570. auto LogBase2 = buildLogBase2(RHS, Builder);
  4571. auto ShiftAmt =
  4572. Builder.buildSub(Ty, Builder.buildConstant(Ty, NumEltBits), LogBase2);
  4573. auto Trunc = Builder.buildZExtOrTrunc(ShiftAmtTy, ShiftAmt);
  4574. Builder.buildLShr(Dst, LHS, Trunc);
  4575. MI.eraseFromParent();
  4576. }
  4577. bool CombinerHelper::matchRedundantNegOperands(MachineInstr &MI,
  4578. BuildFnTy &MatchInfo) {
  4579. unsigned Opc = MI.getOpcode();
  4580. assert(Opc == TargetOpcode::G_FADD || Opc == TargetOpcode::G_FSUB ||
  4581. Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
  4582. Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA);
  4583. Register Dst = MI.getOperand(0).getReg();
  4584. Register X = MI.getOperand(1).getReg();
  4585. Register Y = MI.getOperand(2).getReg();
  4586. LLT Type = MRI.getType(Dst);
  4587. // fold (fadd x, fneg(y)) -> (fsub x, y)
  4588. // fold (fadd fneg(y), x) -> (fsub x, y)
  4589. // G_ADD is commutative so both cases are checked by m_GFAdd
  4590. if (mi_match(Dst, MRI, m_GFAdd(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
  4591. isLegalOrBeforeLegalizer({TargetOpcode::G_FSUB, {Type}})) {
  4592. Opc = TargetOpcode::G_FSUB;
  4593. }
  4594. /// fold (fsub x, fneg(y)) -> (fadd x, y)
  4595. else if (mi_match(Dst, MRI, m_GFSub(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
  4596. isLegalOrBeforeLegalizer({TargetOpcode::G_FADD, {Type}})) {
  4597. Opc = TargetOpcode::G_FADD;
  4598. }
  4599. // fold (fmul fneg(x), fneg(y)) -> (fmul x, y)
  4600. // fold (fdiv fneg(x), fneg(y)) -> (fdiv x, y)
  4601. // fold (fmad fneg(x), fneg(y), z) -> (fmad x, y, z)
  4602. // fold (fma fneg(x), fneg(y), z) -> (fma x, y, z)
  4603. else if ((Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
  4604. Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA) &&
  4605. mi_match(X, MRI, m_GFNeg(m_Reg(X))) &&
  4606. mi_match(Y, MRI, m_GFNeg(m_Reg(Y)))) {
  4607. // no opcode change
  4608. } else
  4609. return false;
  4610. MatchInfo = [=, &MI](MachineIRBuilder &B) {
  4611. Observer.changingInstr(MI);
  4612. MI.setDesc(B.getTII().get(Opc));
  4613. MI.getOperand(1).setReg(X);
  4614. MI.getOperand(2).setReg(Y);
  4615. Observer.changedInstr(MI);
  4616. };
  4617. return true;
  4618. }
  4619. bool CombinerHelper::matchFsubToFneg(MachineInstr &MI, Register &MatchInfo) {
  4620. assert(MI.getOpcode() == TargetOpcode::G_FSUB);
  4621. Register LHS = MI.getOperand(1).getReg();
  4622. MatchInfo = MI.getOperand(2).getReg();
  4623. LLT Ty = MRI.getType(MI.getOperand(0).getReg());
  4624. const auto LHSCst = Ty.isVector()
  4625. ? getFConstantSplat(LHS, MRI, /* allowUndef */ true)
  4626. : getFConstantVRegValWithLookThrough(LHS, MRI);
  4627. if (!LHSCst)
  4628. return false;
  4629. // -0.0 is always allowed
  4630. if (LHSCst->Value.isNegZero())
  4631. return true;
  4632. // +0.0 is only allowed if nsz is set.
  4633. if (LHSCst->Value.isPosZero())
  4634. return MI.getFlag(MachineInstr::FmNsz);
  4635. return false;
  4636. }
  4637. void CombinerHelper::applyFsubToFneg(MachineInstr &MI, Register &MatchInfo) {
  4638. Builder.setInstrAndDebugLoc(MI);
  4639. Register Dst = MI.getOperand(0).getReg();
  4640. Builder.buildFNeg(
  4641. Dst, Builder.buildFCanonicalize(MRI.getType(Dst), MatchInfo).getReg(0));
  4642. eraseInst(MI);
  4643. }
  4644. /// Checks if \p MI is TargetOpcode::G_FMUL and contractable either
  4645. /// due to global flags or MachineInstr flags.
  4646. static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally) {
  4647. if (MI.getOpcode() != TargetOpcode::G_FMUL)
  4648. return false;
  4649. return AllowFusionGlobally || MI.getFlag(MachineInstr::MIFlag::FmContract);
  4650. }
  4651. static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1,
  4652. const MachineRegisterInfo &MRI) {
  4653. return std::distance(MRI.use_instr_nodbg_begin(MI0.getOperand(0).getReg()),
  4654. MRI.use_instr_nodbg_end()) >
  4655. std::distance(MRI.use_instr_nodbg_begin(MI1.getOperand(0).getReg()),
  4656. MRI.use_instr_nodbg_end());
  4657. }
  4658. bool CombinerHelper::canCombineFMadOrFMA(MachineInstr &MI,
  4659. bool &AllowFusionGlobally,
  4660. bool &HasFMAD, bool &Aggressive,
  4661. bool CanReassociate) {
  4662. auto *MF = MI.getMF();
  4663. const auto &TLI = *MF->getSubtarget().getTargetLowering();
  4664. const TargetOptions &Options = MF->getTarget().Options;
  4665. LLT DstType = MRI.getType(MI.getOperand(0).getReg());
  4666. if (CanReassociate &&
  4667. !(Options.UnsafeFPMath || MI.getFlag(MachineInstr::MIFlag::FmReassoc)))
  4668. return false;
  4669. // Floating-point multiply-add with intermediate rounding.
  4670. HasFMAD = (!isPreLegalize() && TLI.isFMADLegal(MI, DstType));
  4671. // Floating-point multiply-add without intermediate rounding.
  4672. bool HasFMA = TLI.isFMAFasterThanFMulAndFAdd(*MF, DstType) &&
  4673. isLegalOrBeforeLegalizer({TargetOpcode::G_FMA, {DstType}});
  4674. // No valid opcode, do not combine.
  4675. if (!HasFMAD && !HasFMA)
  4676. return false;
  4677. AllowFusionGlobally = Options.AllowFPOpFusion == FPOpFusion::Fast ||
  4678. Options.UnsafeFPMath || HasFMAD;
  4679. // If the addition is not contractable, do not combine.
  4680. if (!AllowFusionGlobally && !MI.getFlag(MachineInstr::MIFlag::FmContract))
  4681. return false;
  4682. Aggressive = TLI.enableAggressiveFMAFusion(DstType);
  4683. return true;
  4684. }
  4685. bool CombinerHelper::matchCombineFAddFMulToFMadOrFMA(
  4686. MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
  4687. assert(MI.getOpcode() == TargetOpcode::G_FADD);
  4688. bool AllowFusionGlobally, HasFMAD, Aggressive;
  4689. if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
  4690. return false;
  4691. Register Op1 = MI.getOperand(1).getReg();
  4692. Register Op2 = MI.getOperand(2).getReg();
  4693. DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
  4694. DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
  4695. unsigned PreferredFusedOpcode =
  4696. HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
  4697. // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
  4698. // prefer to fold the multiply with fewer uses.
  4699. if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
  4700. isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
  4701. if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
  4702. std::swap(LHS, RHS);
  4703. }
  4704. // fold (fadd (fmul x, y), z) -> (fma x, y, z)
  4705. if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
  4706. (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg))) {
  4707. MatchInfo = [=, &MI](MachineIRBuilder &B) {
  4708. B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
  4709. {LHS.MI->getOperand(1).getReg(),
  4710. LHS.MI->getOperand(2).getReg(), RHS.Reg});
  4711. };
  4712. return true;
  4713. }
  4714. // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
  4715. if (isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
  4716. (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg))) {
  4717. MatchInfo = [=, &MI](MachineIRBuilder &B) {
  4718. B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
  4719. {RHS.MI->getOperand(1).getReg(),
  4720. RHS.MI->getOperand(2).getReg(), LHS.Reg});
  4721. };
  4722. return true;
  4723. }
  4724. return false;
  4725. }
  4726. bool CombinerHelper::matchCombineFAddFpExtFMulToFMadOrFMA(
  4727. MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
  4728. assert(MI.getOpcode() == TargetOpcode::G_FADD);
  4729. bool AllowFusionGlobally, HasFMAD, Aggressive;
  4730. if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
  4731. return false;
  4732. const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
  4733. Register Op1 = MI.getOperand(1).getReg();
  4734. Register Op2 = MI.getOperand(2).getReg();
  4735. DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
  4736. DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
  4737. LLT DstType = MRI.getType(MI.getOperand(0).getReg());
  4738. unsigned PreferredFusedOpcode =
  4739. HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
  4740. // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
  4741. // prefer to fold the multiply with fewer uses.
  4742. if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
  4743. isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
  4744. if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
  4745. std::swap(LHS, RHS);
  4746. }
  4747. // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
  4748. MachineInstr *FpExtSrc;
  4749. if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
  4750. isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
  4751. TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
  4752. MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
  4753. MatchInfo = [=, &MI](MachineIRBuilder &B) {
  4754. auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
  4755. auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
  4756. B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
  4757. {FpExtX.getReg(0), FpExtY.getReg(0), RHS.Reg});
  4758. };
  4759. return true;
  4760. }
  4761. // fold (fadd z, (fpext (fmul x, y))) -> (fma (fpext x), (fpext y), z)
  4762. // Note: Commutes FADD operands.
  4763. if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
  4764. isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
  4765. TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
  4766. MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
  4767. MatchInfo = [=, &MI](MachineIRBuilder &B) {
  4768. auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
  4769. auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
  4770. B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
  4771. {FpExtX.getReg(0), FpExtY.getReg(0), LHS.Reg});
  4772. };
  4773. return true;
  4774. }
  4775. return false;
  4776. }
  4777. bool CombinerHelper::matchCombineFAddFMAFMulToFMadOrFMA(
  4778. MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
  4779. assert(MI.getOpcode() == TargetOpcode::G_FADD);
  4780. bool AllowFusionGlobally, HasFMAD, Aggressive;
  4781. if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive, true))
  4782. return false;
  4783. Register Op1 = MI.getOperand(1).getReg();
  4784. Register Op2 = MI.getOperand(2).getReg();
  4785. DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
  4786. DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
  4787. LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
  4788. unsigned PreferredFusedOpcode =
  4789. HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
  4790. // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
  4791. // prefer to fold the multiply with fewer uses.
  4792. if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
  4793. isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
  4794. if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
  4795. std::swap(LHS, RHS);
  4796. }
  4797. MachineInstr *FMA = nullptr;
  4798. Register Z;
  4799. // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z))
  4800. if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
  4801. (MRI.getVRegDef(LHS.MI->getOperand(3).getReg())->getOpcode() ==
  4802. TargetOpcode::G_FMUL) &&
  4803. MRI.hasOneNonDBGUse(LHS.MI->getOperand(0).getReg()) &&
  4804. MRI.hasOneNonDBGUse(LHS.MI->getOperand(3).getReg())) {
  4805. FMA = LHS.MI;
  4806. Z = RHS.Reg;
  4807. }
  4808. // fold (fadd z, (fma x, y, (fmul u, v))) -> (fma x, y, (fma u, v, z))
  4809. else if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
  4810. (MRI.getVRegDef(RHS.MI->getOperand(3).getReg())->getOpcode() ==
  4811. TargetOpcode::G_FMUL) &&
  4812. MRI.hasOneNonDBGUse(RHS.MI->getOperand(0).getReg()) &&
  4813. MRI.hasOneNonDBGUse(RHS.MI->getOperand(3).getReg())) {
  4814. Z = LHS.Reg;
  4815. FMA = RHS.MI;
  4816. }
  4817. if (FMA) {
  4818. MachineInstr *FMulMI = MRI.getVRegDef(FMA->getOperand(3).getReg());
  4819. Register X = FMA->getOperand(1).getReg();
  4820. Register Y = FMA->getOperand(2).getReg();
  4821. Register U = FMulMI->getOperand(1).getReg();
  4822. Register V = FMulMI->getOperand(2).getReg();
  4823. MatchInfo = [=, &MI](MachineIRBuilder &B) {
  4824. Register InnerFMA = MRI.createGenericVirtualRegister(DstTy);
  4825. B.buildInstr(PreferredFusedOpcode, {InnerFMA}, {U, V, Z});
  4826. B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
  4827. {X, Y, InnerFMA});
  4828. };
  4829. return true;
  4830. }
  4831. return false;
  4832. }
  4833. bool CombinerHelper::matchCombineFAddFpExtFMulToFMadOrFMAAggressive(
  4834. MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
  4835. assert(MI.getOpcode() == TargetOpcode::G_FADD);
  4836. bool AllowFusionGlobally, HasFMAD, Aggressive;
  4837. if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
  4838. return false;
  4839. if (!Aggressive)
  4840. return false;
  4841. const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
  4842. LLT DstType = MRI.getType(MI.getOperand(0).getReg());
  4843. Register Op1 = MI.getOperand(1).getReg();
  4844. Register Op2 = MI.getOperand(2).getReg();
  4845. DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
  4846. DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
  4847. unsigned PreferredFusedOpcode =
  4848. HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
  4849. // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
  4850. // prefer to fold the multiply with fewer uses.
  4851. if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
  4852. isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
  4853. if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
  4854. std::swap(LHS, RHS);
  4855. }
  4856. // Builds: (fma x, y, (fma (fpext u), (fpext v), z))
  4857. auto buildMatchInfo = [=, &MI](Register U, Register V, Register Z, Register X,
  4858. Register Y, MachineIRBuilder &B) {
  4859. Register FpExtU = B.buildFPExt(DstType, U).getReg(0);
  4860. Register FpExtV = B.buildFPExt(DstType, V).getReg(0);
  4861. Register InnerFMA =
  4862. B.buildInstr(PreferredFusedOpcode, {DstType}, {FpExtU, FpExtV, Z})
  4863. .getReg(0);
  4864. B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
  4865. {X, Y, InnerFMA});
  4866. };
  4867. MachineInstr *FMulMI, *FMAMI;
  4868. // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
  4869. // -> (fma x, y, (fma (fpext u), (fpext v), z))
  4870. if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
  4871. mi_match(LHS.MI->getOperand(3).getReg(), MRI,
  4872. m_GFPExt(m_MInstr(FMulMI))) &&
  4873. isContractableFMul(*FMulMI, AllowFusionGlobally) &&
  4874. TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
  4875. MRI.getType(FMulMI->getOperand(0).getReg()))) {
  4876. MatchInfo = [=](MachineIRBuilder &B) {
  4877. buildMatchInfo(FMulMI->getOperand(1).getReg(),
  4878. FMulMI->getOperand(2).getReg(), RHS.Reg,
  4879. LHS.MI->getOperand(1).getReg(),
  4880. LHS.MI->getOperand(2).getReg(), B);
  4881. };
  4882. return true;
  4883. }
  4884. // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
  4885. // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
  4886. // FIXME: This turns two single-precision and one double-precision
  4887. // operation into two double-precision operations, which might not be
  4888. // interesting for all targets, especially GPUs.
  4889. if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
  4890. FMAMI->getOpcode() == PreferredFusedOpcode) {
  4891. MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
  4892. if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
  4893. TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
  4894. MRI.getType(FMAMI->getOperand(0).getReg()))) {
  4895. MatchInfo = [=](MachineIRBuilder &B) {
  4896. Register X = FMAMI->getOperand(1).getReg();
  4897. Register Y = FMAMI->getOperand(2).getReg();
  4898. X = B.buildFPExt(DstType, X).getReg(0);
  4899. Y = B.buildFPExt(DstType, Y).getReg(0);
  4900. buildMatchInfo(FMulMI->getOperand(1).getReg(),
  4901. FMulMI->getOperand(2).getReg(), RHS.Reg, X, Y, B);
  4902. };
  4903. return true;
  4904. }
  4905. }
  4906. // fold (fadd z, (fma x, y, (fpext (fmul u, v)))
  4907. // -> (fma x, y, (fma (fpext u), (fpext v), z))
  4908. if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
  4909. mi_match(RHS.MI->getOperand(3).getReg(), MRI,
  4910. m_GFPExt(m_MInstr(FMulMI))) &&
  4911. isContractableFMul(*FMulMI, AllowFusionGlobally) &&
  4912. TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
  4913. MRI.getType(FMulMI->getOperand(0).getReg()))) {
  4914. MatchInfo = [=](MachineIRBuilder &B) {
  4915. buildMatchInfo(FMulMI->getOperand(1).getReg(),
  4916. FMulMI->getOperand(2).getReg(), LHS.Reg,
  4917. RHS.MI->getOperand(1).getReg(),
  4918. RHS.MI->getOperand(2).getReg(), B);
  4919. };
  4920. return true;
  4921. }
  4922. // fold (fadd z, (fpext (fma x, y, (fmul u, v)))
  4923. // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
  4924. // FIXME: This turns two single-precision and one double-precision
  4925. // operation into two double-precision operations, which might not be
  4926. // interesting for all targets, especially GPUs.
  4927. if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
  4928. FMAMI->getOpcode() == PreferredFusedOpcode) {
  4929. MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
  4930. if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
  4931. TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
  4932. MRI.getType(FMAMI->getOperand(0).getReg()))) {
  4933. MatchInfo = [=](MachineIRBuilder &B) {
  4934. Register X = FMAMI->getOperand(1).getReg();
  4935. Register Y = FMAMI->getOperand(2).getReg();
  4936. X = B.buildFPExt(DstType, X).getReg(0);
  4937. Y = B.buildFPExt(DstType, Y).getReg(0);
  4938. buildMatchInfo(FMulMI->getOperand(1).getReg(),
  4939. FMulMI->getOperand(2).getReg(), LHS.Reg, X, Y, B);
  4940. };
  4941. return true;
  4942. }
  4943. }
  4944. return false;
  4945. }
  4946. bool CombinerHelper::matchCombineFSubFMulToFMadOrFMA(
  4947. MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
  4948. assert(MI.getOpcode() == TargetOpcode::G_FSUB);
  4949. bool AllowFusionGlobally, HasFMAD, Aggressive;
  4950. if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
  4951. return false;
  4952. Register Op1 = MI.getOperand(1).getReg();
  4953. Register Op2 = MI.getOperand(2).getReg();
  4954. DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
  4955. DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
  4956. LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
  4957. // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
  4958. // prefer to fold the multiply with fewer uses.
  4959. int FirstMulHasFewerUses = true;
  4960. if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
  4961. isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
  4962. hasMoreUses(*LHS.MI, *RHS.MI, MRI))
  4963. FirstMulHasFewerUses = false;
  4964. unsigned PreferredFusedOpcode =
  4965. HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
  4966. // fold (fsub (fmul x, y), z) -> (fma x, y, -z)
  4967. if (FirstMulHasFewerUses &&
  4968. (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
  4969. (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg)))) {
  4970. MatchInfo = [=, &MI](MachineIRBuilder &B) {
  4971. Register NegZ = B.buildFNeg(DstTy, RHS.Reg).getReg(0);
  4972. B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
  4973. {LHS.MI->getOperand(1).getReg(),
  4974. LHS.MI->getOperand(2).getReg(), NegZ});
  4975. };
  4976. return true;
  4977. }
  4978. // fold (fsub x, (fmul y, z)) -> (fma -y, z, x)
  4979. else if ((isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
  4980. (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg)))) {
  4981. MatchInfo = [=, &MI](MachineIRBuilder &B) {
  4982. Register NegY =
  4983. B.buildFNeg(DstTy, RHS.MI->getOperand(1).getReg()).getReg(0);
  4984. B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
  4985. {NegY, RHS.MI->getOperand(2).getReg(), LHS.Reg});
  4986. };
  4987. return true;
  4988. }
  4989. return false;
  4990. }
  4991. bool CombinerHelper::matchCombineFSubFNegFMulToFMadOrFMA(
  4992. MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
  4993. assert(MI.getOpcode() == TargetOpcode::G_FSUB);
  4994. bool AllowFusionGlobally, HasFMAD, Aggressive;
  4995. if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
  4996. return false;
  4997. Register LHSReg = MI.getOperand(1).getReg();
  4998. Register RHSReg = MI.getOperand(2).getReg();
  4999. LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
  5000. unsigned PreferredFusedOpcode =
  5001. HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
  5002. MachineInstr *FMulMI;
  5003. // fold (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
  5004. if (mi_match(LHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
  5005. (Aggressive || (MRI.hasOneNonDBGUse(LHSReg) &&
  5006. MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
  5007. isContractableFMul(*FMulMI, AllowFusionGlobally)) {
  5008. MatchInfo = [=, &MI](MachineIRBuilder &B) {
  5009. Register NegX =
  5010. B.buildFNeg(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
  5011. Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
  5012. B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
  5013. {NegX, FMulMI->getOperand(2).getReg(), NegZ});
  5014. };
  5015. return true;
  5016. }
  5017. // fold (fsub x, (fneg (fmul, y, z))) -> (fma y, z, x)
  5018. if (mi_match(RHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
  5019. (Aggressive || (MRI.hasOneNonDBGUse(RHSReg) &&
  5020. MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
  5021. isContractableFMul(*FMulMI, AllowFusionGlobally)) {
  5022. MatchInfo = [=, &MI](MachineIRBuilder &B) {
  5023. B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
  5024. {FMulMI->getOperand(1).getReg(),
  5025. FMulMI->getOperand(2).getReg(), LHSReg});
  5026. };
  5027. return true;
  5028. }
  5029. return false;
  5030. }
  5031. bool CombinerHelper::matchCombineFSubFpExtFMulToFMadOrFMA(
  5032. MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
  5033. assert(MI.getOpcode() == TargetOpcode::G_FSUB);
  5034. bool AllowFusionGlobally, HasFMAD, Aggressive;
  5035. if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
  5036. return false;
  5037. Register LHSReg = MI.getOperand(1).getReg();
  5038. Register RHSReg = MI.getOperand(2).getReg();
  5039. LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
  5040. unsigned PreferredFusedOpcode =
  5041. HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
  5042. MachineInstr *FMulMI;
  5043. // fold (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z))
  5044. if (mi_match(LHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
  5045. isContractableFMul(*FMulMI, AllowFusionGlobally) &&
  5046. (Aggressive || MRI.hasOneNonDBGUse(LHSReg))) {
  5047. MatchInfo = [=, &MI](MachineIRBuilder &B) {
  5048. Register FpExtX =
  5049. B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
  5050. Register FpExtY =
  5051. B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
  5052. Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
  5053. B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
  5054. {FpExtX, FpExtY, NegZ});
  5055. };
  5056. return true;
  5057. }
  5058. // fold (fsub x, (fpext (fmul y, z))) -> (fma (fneg (fpext y)), (fpext z), x)
  5059. if (mi_match(RHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
  5060. isContractableFMul(*FMulMI, AllowFusionGlobally) &&
  5061. (Aggressive || MRI.hasOneNonDBGUse(RHSReg))) {
  5062. MatchInfo = [=, &MI](MachineIRBuilder &B) {
  5063. Register FpExtY =
  5064. B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
  5065. Register NegY = B.buildFNeg(DstTy, FpExtY).getReg(0);
  5066. Register FpExtZ =
  5067. B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
  5068. B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
  5069. {NegY, FpExtZ, LHSReg});
  5070. };
  5071. return true;
  5072. }
  5073. return false;
  5074. }
  5075. bool CombinerHelper::matchCombineFSubFpExtFNegFMulToFMadOrFMA(
  5076. MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
  5077. assert(MI.getOpcode() == TargetOpcode::G_FSUB);
  5078. bool AllowFusionGlobally, HasFMAD, Aggressive;
  5079. if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
  5080. return false;
  5081. const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
  5082. LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
  5083. Register LHSReg = MI.getOperand(1).getReg();
  5084. Register RHSReg = MI.getOperand(2).getReg();
  5085. unsigned PreferredFusedOpcode =
  5086. HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
  5087. auto buildMatchInfo = [=](Register Dst, Register X, Register Y, Register Z,
  5088. MachineIRBuilder &B) {
  5089. Register FpExtX = B.buildFPExt(DstTy, X).getReg(0);
  5090. Register FpExtY = B.buildFPExt(DstTy, Y).getReg(0);
  5091. B.buildInstr(PreferredFusedOpcode, {Dst}, {FpExtX, FpExtY, Z});
  5092. };
  5093. MachineInstr *FMulMI;
  5094. // fold (fsub (fpext (fneg (fmul x, y))), z) ->
  5095. // (fneg (fma (fpext x), (fpext y), z))
  5096. // fold (fsub (fneg (fpext (fmul x, y))), z) ->
  5097. // (fneg (fma (fpext x), (fpext y), z))
  5098. if ((mi_match(LHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
  5099. mi_match(LHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
  5100. isContractableFMul(*FMulMI, AllowFusionGlobally) &&
  5101. TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
  5102. MRI.getType(FMulMI->getOperand(0).getReg()))) {
  5103. MatchInfo = [=, &MI](MachineIRBuilder &B) {
  5104. Register FMAReg = MRI.createGenericVirtualRegister(DstTy);
  5105. buildMatchInfo(FMAReg, FMulMI->getOperand(1).getReg(),
  5106. FMulMI->getOperand(2).getReg(), RHSReg, B);
  5107. B.buildFNeg(MI.getOperand(0).getReg(), FMAReg);
  5108. };
  5109. return true;
  5110. }
  5111. // fold (fsub x, (fpext (fneg (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
  5112. // fold (fsub x, (fneg (fpext (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
  5113. if ((mi_match(RHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
  5114. mi_match(RHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
  5115. isContractableFMul(*FMulMI, AllowFusionGlobally) &&
  5116. TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
  5117. MRI.getType(FMulMI->getOperand(0).getReg()))) {
  5118. MatchInfo = [=, &MI](MachineIRBuilder &B) {
  5119. buildMatchInfo(MI.getOperand(0).getReg(), FMulMI->getOperand(1).getReg(),
  5120. FMulMI->getOperand(2).getReg(), LHSReg, B);
  5121. };
  5122. return true;
  5123. }
  5124. return false;
  5125. }
  5126. bool CombinerHelper::matchSelectToLogical(MachineInstr &MI,
  5127. BuildFnTy &MatchInfo) {
  5128. GSelect &Sel = cast<GSelect>(MI);
  5129. Register DstReg = Sel.getReg(0);
  5130. Register Cond = Sel.getCondReg();
  5131. Register TrueReg = Sel.getTrueReg();
  5132. Register FalseReg = Sel.getFalseReg();
  5133. auto *TrueDef = getDefIgnoringCopies(TrueReg, MRI);
  5134. auto *FalseDef = getDefIgnoringCopies(FalseReg, MRI);
  5135. const LLT CondTy = MRI.getType(Cond);
  5136. const LLT OpTy = MRI.getType(TrueReg);
  5137. if (CondTy != OpTy || OpTy.getScalarSizeInBits() != 1)
  5138. return false;
  5139. // We have a boolean select.
  5140. // select Cond, Cond, F --> or Cond, F
  5141. // select Cond, 1, F --> or Cond, F
  5142. auto MaybeCstTrue = isConstantOrConstantSplatVector(*TrueDef, MRI);
  5143. if (Cond == TrueReg || (MaybeCstTrue && MaybeCstTrue->isOne())) {
  5144. MatchInfo = [=](MachineIRBuilder &MIB) {
  5145. MIB.buildOr(DstReg, Cond, FalseReg);
  5146. };
  5147. return true;
  5148. }
  5149. // select Cond, T, Cond --> and Cond, T
  5150. // select Cond, T, 0 --> and Cond, T
  5151. auto MaybeCstFalse = isConstantOrConstantSplatVector(*FalseDef, MRI);
  5152. if (Cond == FalseReg || (MaybeCstFalse && MaybeCstFalse->isZero())) {
  5153. MatchInfo = [=](MachineIRBuilder &MIB) {
  5154. MIB.buildAnd(DstReg, Cond, TrueReg);
  5155. };
  5156. return true;
  5157. }
  5158. // select Cond, T, 1 --> or (not Cond), T
  5159. if (MaybeCstFalse && MaybeCstFalse->isOne()) {
  5160. MatchInfo = [=](MachineIRBuilder &MIB) {
  5161. MIB.buildOr(DstReg, MIB.buildNot(OpTy, Cond), TrueReg);
  5162. };
  5163. return true;
  5164. }
  5165. // select Cond, 0, F --> and (not Cond), F
  5166. if (MaybeCstTrue && MaybeCstTrue->isZero()) {
  5167. MatchInfo = [=](MachineIRBuilder &MIB) {
  5168. MIB.buildAnd(DstReg, MIB.buildNot(OpTy, Cond), FalseReg);
  5169. };
  5170. return true;
  5171. }
  5172. return false;
  5173. }
  5174. bool CombinerHelper::matchCombineFMinMaxNaN(MachineInstr &MI,
  5175. unsigned &IdxToPropagate) {
  5176. bool PropagateNaN;
  5177. switch (MI.getOpcode()) {
  5178. default:
  5179. return false;
  5180. case TargetOpcode::G_FMINNUM:
  5181. case TargetOpcode::G_FMAXNUM:
  5182. PropagateNaN = false;
  5183. break;
  5184. case TargetOpcode::G_FMINIMUM:
  5185. case TargetOpcode::G_FMAXIMUM:
  5186. PropagateNaN = true;
  5187. break;
  5188. }
  5189. auto MatchNaN = [&](unsigned Idx) {
  5190. Register MaybeNaNReg = MI.getOperand(Idx).getReg();
  5191. const ConstantFP *MaybeCst = getConstantFPVRegVal(MaybeNaNReg, MRI);
  5192. if (!MaybeCst || !MaybeCst->getValueAPF().isNaN())
  5193. return false;
  5194. IdxToPropagate = PropagateNaN ? Idx : (Idx == 1 ? 2 : 1);
  5195. return true;
  5196. };
  5197. return MatchNaN(1) || MatchNaN(2);
  5198. }
  5199. bool CombinerHelper::matchAddSubSameReg(MachineInstr &MI, Register &Src) {
  5200. assert(MI.getOpcode() == TargetOpcode::G_ADD && "Expected a G_ADD");
  5201. Register LHS = MI.getOperand(1).getReg();
  5202. Register RHS = MI.getOperand(2).getReg();
  5203. // Helper lambda to check for opportunities for
  5204. // A + (B - A) -> B
  5205. // (B - A) + A -> B
  5206. auto CheckFold = [&](Register MaybeSub, Register MaybeSameReg) {
  5207. Register Reg;
  5208. return mi_match(MaybeSub, MRI, m_GSub(m_Reg(Src), m_Reg(Reg))) &&
  5209. Reg == MaybeSameReg;
  5210. };
  5211. return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
  5212. }
  5213. bool CombinerHelper::matchBuildVectorIdentityFold(MachineInstr &MI,
  5214. Register &MatchInfo) {
  5215. // This combine folds the following patterns:
  5216. //
  5217. // G_BUILD_VECTOR_TRUNC (G_BITCAST(x), G_LSHR(G_BITCAST(x), k))
  5218. // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), G_TRUNC(G_LSHR(G_BITCAST(x), k)))
  5219. // into
  5220. // x
  5221. // if
  5222. // k == sizeof(VecEltTy)/2
  5223. // type(x) == type(dst)
  5224. //
  5225. // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), undef)
  5226. // into
  5227. // x
  5228. // if
  5229. // type(x) == type(dst)
  5230. LLT DstVecTy = MRI.getType(MI.getOperand(0).getReg());
  5231. LLT DstEltTy = DstVecTy.getElementType();
  5232. Register Lo, Hi;
  5233. if (mi_match(
  5234. MI, MRI,
  5235. m_GBuildVector(m_GTrunc(m_GBitcast(m_Reg(Lo))), m_GImplicitDef()))) {
  5236. MatchInfo = Lo;
  5237. return MRI.getType(MatchInfo) == DstVecTy;
  5238. }
  5239. std::optional<ValueAndVReg> ShiftAmount;
  5240. const auto LoPattern = m_GBitcast(m_Reg(Lo));
  5241. const auto HiPattern = m_GLShr(m_GBitcast(m_Reg(Hi)), m_GCst(ShiftAmount));
  5242. if (mi_match(
  5243. MI, MRI,
  5244. m_any_of(m_GBuildVectorTrunc(LoPattern, HiPattern),
  5245. m_GBuildVector(m_GTrunc(LoPattern), m_GTrunc(HiPattern))))) {
  5246. if (Lo == Hi && ShiftAmount->Value == DstEltTy.getSizeInBits()) {
  5247. MatchInfo = Lo;
  5248. return MRI.getType(MatchInfo) == DstVecTy;
  5249. }
  5250. }
  5251. return false;
  5252. }
  5253. bool CombinerHelper::matchTruncBuildVectorFold(MachineInstr &MI,
  5254. Register &MatchInfo) {
  5255. // Replace (G_TRUNC (G_BITCAST (G_BUILD_VECTOR x, y)) with just x
  5256. // if type(x) == type(G_TRUNC)
  5257. if (!mi_match(MI.getOperand(1).getReg(), MRI,
  5258. m_GBitcast(m_GBuildVector(m_Reg(MatchInfo), m_Reg()))))
  5259. return false;
  5260. return MRI.getType(MatchInfo) == MRI.getType(MI.getOperand(0).getReg());
  5261. }
  5262. bool CombinerHelper::matchTruncLshrBuildVectorFold(MachineInstr &MI,
  5263. Register &MatchInfo) {
  5264. // Replace (G_TRUNC (G_LSHR (G_BITCAST (G_BUILD_VECTOR x, y)), K)) with
  5265. // y if K == size of vector element type
  5266. std::optional<ValueAndVReg> ShiftAmt;
  5267. if (!mi_match(MI.getOperand(1).getReg(), MRI,
  5268. m_GLShr(m_GBitcast(m_GBuildVector(m_Reg(), m_Reg(MatchInfo))),
  5269. m_GCst(ShiftAmt))))
  5270. return false;
  5271. LLT MatchTy = MRI.getType(MatchInfo);
  5272. return ShiftAmt->Value.getZExtValue() == MatchTy.getSizeInBits() &&
  5273. MatchTy == MRI.getType(MI.getOperand(0).getReg());
  5274. }
  5275. unsigned CombinerHelper::getFPMinMaxOpcForSelect(
  5276. CmpInst::Predicate Pred, LLT DstTy,
  5277. SelectPatternNaNBehaviour VsNaNRetVal) const {
  5278. assert(VsNaNRetVal != SelectPatternNaNBehaviour::NOT_APPLICABLE &&
  5279. "Expected a NaN behaviour?");
  5280. // Choose an opcode based off of legality or the behaviour when one of the
  5281. // LHS/RHS may be NaN.
  5282. switch (Pred) {
  5283. default:
  5284. return 0;
  5285. case CmpInst::FCMP_UGT:
  5286. case CmpInst::FCMP_UGE:
  5287. case CmpInst::FCMP_OGT:
  5288. case CmpInst::FCMP_OGE:
  5289. if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER)
  5290. return TargetOpcode::G_FMAXNUM;
  5291. if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN)
  5292. return TargetOpcode::G_FMAXIMUM;
  5293. if (isLegal({TargetOpcode::G_FMAXNUM, {DstTy}}))
  5294. return TargetOpcode::G_FMAXNUM;
  5295. if (isLegal({TargetOpcode::G_FMAXIMUM, {DstTy}}))
  5296. return TargetOpcode::G_FMAXIMUM;
  5297. return 0;
  5298. case CmpInst::FCMP_ULT:
  5299. case CmpInst::FCMP_ULE:
  5300. case CmpInst::FCMP_OLT:
  5301. case CmpInst::FCMP_OLE:
  5302. if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER)
  5303. return TargetOpcode::G_FMINNUM;
  5304. if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN)
  5305. return TargetOpcode::G_FMINIMUM;
  5306. if (isLegal({TargetOpcode::G_FMINNUM, {DstTy}}))
  5307. return TargetOpcode::G_FMINNUM;
  5308. if (!isLegal({TargetOpcode::G_FMINIMUM, {DstTy}}))
  5309. return 0;
  5310. return TargetOpcode::G_FMINIMUM;
  5311. }
  5312. }
  5313. CombinerHelper::SelectPatternNaNBehaviour
  5314. CombinerHelper::computeRetValAgainstNaN(Register LHS, Register RHS,
  5315. bool IsOrderedComparison) const {
  5316. bool LHSSafe = isKnownNeverNaN(LHS, MRI);
  5317. bool RHSSafe = isKnownNeverNaN(RHS, MRI);
  5318. // Completely unsafe.
  5319. if (!LHSSafe && !RHSSafe)
  5320. return SelectPatternNaNBehaviour::NOT_APPLICABLE;
  5321. if (LHSSafe && RHSSafe)
  5322. return SelectPatternNaNBehaviour::RETURNS_ANY;
  5323. // An ordered comparison will return false when given a NaN, so it
  5324. // returns the RHS.
  5325. if (IsOrderedComparison)
  5326. return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_NAN
  5327. : SelectPatternNaNBehaviour::RETURNS_OTHER;
  5328. // An unordered comparison will return true when given a NaN, so it
  5329. // returns the LHS.
  5330. return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_OTHER
  5331. : SelectPatternNaNBehaviour::RETURNS_NAN;
  5332. }
  5333. bool CombinerHelper::matchFPSelectToMinMax(Register Dst, Register Cond,
  5334. Register TrueVal, Register FalseVal,
  5335. BuildFnTy &MatchInfo) {
  5336. // Match: select (fcmp cond x, y) x, y
  5337. // select (fcmp cond x, y) y, x
  5338. // And turn it into fminnum/fmaxnum or fmin/fmax based off of the condition.
  5339. LLT DstTy = MRI.getType(Dst);
  5340. // Bail out early on pointers, since we'll never want to fold to a min/max.
  5341. if (DstTy.isPointer())
  5342. return false;
  5343. // Match a floating point compare with a less-than/greater-than predicate.
  5344. // TODO: Allow multiple users of the compare if they are all selects.
  5345. CmpInst::Predicate Pred;
  5346. Register CmpLHS, CmpRHS;
  5347. if (!mi_match(Cond, MRI,
  5348. m_OneNonDBGUse(
  5349. m_GFCmp(m_Pred(Pred), m_Reg(CmpLHS), m_Reg(CmpRHS)))) ||
  5350. CmpInst::isEquality(Pred))
  5351. return false;
  5352. SelectPatternNaNBehaviour ResWithKnownNaNInfo =
  5353. computeRetValAgainstNaN(CmpLHS, CmpRHS, CmpInst::isOrdered(Pred));
  5354. if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::NOT_APPLICABLE)
  5355. return false;
  5356. if (TrueVal == CmpRHS && FalseVal == CmpLHS) {
  5357. std::swap(CmpLHS, CmpRHS);
  5358. Pred = CmpInst::getSwappedPredicate(Pred);
  5359. if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_NAN)
  5360. ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_OTHER;
  5361. else if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_OTHER)
  5362. ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_NAN;
  5363. }
  5364. if (TrueVal != CmpLHS || FalseVal != CmpRHS)
  5365. return false;
  5366. // Decide what type of max/min this should be based off of the predicate.
  5367. unsigned Opc = getFPMinMaxOpcForSelect(Pred, DstTy, ResWithKnownNaNInfo);
  5368. if (!Opc || !isLegal({Opc, {DstTy}}))
  5369. return false;
  5370. // Comparisons between signed zero and zero may have different results...
  5371. // unless we have fmaximum/fminimum. In that case, we know -0 < 0.
  5372. if (Opc != TargetOpcode::G_FMAXIMUM && Opc != TargetOpcode::G_FMINIMUM) {
  5373. // We don't know if a comparison between two 0s will give us a consistent
  5374. // result. Be conservative and only proceed if at least one side is
  5375. // non-zero.
  5376. auto KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpLHS, MRI);
  5377. if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero()) {
  5378. KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpRHS, MRI);
  5379. if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero())
  5380. return false;
  5381. }
  5382. }
  5383. MatchInfo = [=](MachineIRBuilder &B) {
  5384. B.buildInstr(Opc, {Dst}, {CmpLHS, CmpRHS});
  5385. };
  5386. return true;
  5387. }
  5388. bool CombinerHelper::matchSimplifySelectToMinMax(MachineInstr &MI,
  5389. BuildFnTy &MatchInfo) {
  5390. // TODO: Handle integer cases.
  5391. assert(MI.getOpcode() == TargetOpcode::G_SELECT);
  5392. // Condition may be fed by a truncated compare.
  5393. Register Cond = MI.getOperand(1).getReg();
  5394. Register MaybeTrunc;
  5395. if (mi_match(Cond, MRI, m_OneNonDBGUse(m_GTrunc(m_Reg(MaybeTrunc)))))
  5396. Cond = MaybeTrunc;
  5397. Register Dst = MI.getOperand(0).getReg();
  5398. Register TrueVal = MI.getOperand(2).getReg();
  5399. Register FalseVal = MI.getOperand(3).getReg();
  5400. return matchFPSelectToMinMax(Dst, Cond, TrueVal, FalseVal, MatchInfo);
  5401. }
  5402. bool CombinerHelper::matchRedundantBinOpInEquality(MachineInstr &MI,
  5403. BuildFnTy &MatchInfo) {
  5404. assert(MI.getOpcode() == TargetOpcode::G_ICMP);
  5405. // (X + Y) == X --> Y == 0
  5406. // (X + Y) != X --> Y != 0
  5407. // (X - Y) == X --> Y == 0
  5408. // (X - Y) != X --> Y != 0
  5409. // (X ^ Y) == X --> Y == 0
  5410. // (X ^ Y) != X --> Y != 0
  5411. Register Dst = MI.getOperand(0).getReg();
  5412. CmpInst::Predicate Pred;
  5413. Register X, Y, OpLHS, OpRHS;
  5414. bool MatchedSub = mi_match(
  5415. Dst, MRI,
  5416. m_c_GICmp(m_Pred(Pred), m_Reg(X), m_GSub(m_Reg(OpLHS), m_Reg(Y))));
  5417. if (MatchedSub && X != OpLHS)
  5418. return false;
  5419. if (!MatchedSub) {
  5420. if (!mi_match(Dst, MRI,
  5421. m_c_GICmp(m_Pred(Pred), m_Reg(X),
  5422. m_any_of(m_GAdd(m_Reg(OpLHS), m_Reg(OpRHS)),
  5423. m_GXor(m_Reg(OpLHS), m_Reg(OpRHS))))))
  5424. return false;
  5425. Y = X == OpLHS ? OpRHS : X == OpRHS ? OpLHS : Register();
  5426. }
  5427. MatchInfo = [=](MachineIRBuilder &B) {
  5428. auto Zero = B.buildConstant(MRI.getType(Y), 0);
  5429. B.buildICmp(Pred, Dst, Y, Zero);
  5430. };
  5431. return CmpInst::isEquality(Pred) && Y.isValid();
  5432. }
  5433. bool CombinerHelper::tryCombine(MachineInstr &MI) {
  5434. if (tryCombineCopy(MI))
  5435. return true;
  5436. if (tryCombineExtendingLoads(MI))
  5437. return true;
  5438. if (tryCombineIndexedLoadStore(MI))
  5439. return true;
  5440. return false;
  5441. }