PPCISelDAGToDAG.cpp 295 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834683568366837683868396840684168426843684468456846684768486849685068516852685368546855685668576858685968606861686268636864686568666867686868696870687168726873687468756876687768786879688068816882688368846885688668876888688968906891689268936894689568966897689868996900690169026903690469056906690769086909691069116912691369146915691669176918691969206921692269236924692569266927692869296930693169326933693469356936693769386939694069416942694369446945694669476948694969506951695269536954695569566957695869596960696169626963696469656966696769686969697069716972697369746975697669776978697969806981698269836984698569866987698869896990699169926993699469956996699769986999700070017002700370047005700670077008700970107011701270137014701570167017701870197020702170227023702470257026702770287029703070317032703370347035703670377038703970407041704270437044704570467047704870497050705170527053705470557056705770587059706070617062706370647065706670677068706970707071707270737074707570767077707870797080708170827083708470857086708770887089709070917092709370947095709670977098709971007101710271037104710571067107710871097110711171127113711471157116711771187119712071217122712371247125712671277128712971307131713271337134713571367137713871397140714171427143714471457146714771487149715071517152715371547155715671577158715971607161716271637164716571667167716871697170717171727173717471757176717771787179718071817182718371847185718671877188718971907191719271937194719571967197719871997200720172027203720472057206720772087209721072117212721372147215721672177218721972207221722272237224722572267227722872297230723172327233723472357236723772387239724072417242724372447245724672477248724972507251725272537254725572567257725872597260726172627263726472657266726772687269727072717272727372747275727672777278727972807281728272837284728572867287728872897290729172927293729472957296729772987299730073017302730373047305730673077308730973107311731273137314731573167317731873197320732173227323732473257326732773287329733073317332733373347335733673377338733973407341734273437344734573467347734873497350735173527353735473557356735773587359736073617362736373647365736673677368736973707371737273737374737573767377737873797380738173827383738473857386738773887389739073917392739373947395739673977398739974007401740274037404740574067407740874097410741174127413741474157416741774187419742074217422742374247425742674277428742974307431743274337434743574367437743874397440744174427443744474457446744774487449745074517452745374547455745674577458745974607461746274637464746574667467746874697470747174727473747474757476747774787479748074817482748374847485748674877488748974907491749274937494749574967497749874997500750175027503750475057506750775087509751075117512751375147515751675177518751975207521752275237524752575267527752875297530753175327533753475357536753775387539754075417542754375447545754675477548754975507551755275537554755575567557755875597560756175627563756475657566756775687569757075717572757375747575757675777578757975807581758275837584758575867587758875897590759175927593759475957596759775987599760076017602760376047605760676077608760976107611761276137614
  1. //===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines a pattern matching instruction selector for PowerPC,
  10. // converting from a legalized dag to a PPC dag.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "MCTargetDesc/PPCMCTargetDesc.h"
  14. #include "MCTargetDesc/PPCPredicates.h"
  15. #include "PPC.h"
  16. #include "PPCISelLowering.h"
  17. #include "PPCMachineFunctionInfo.h"
  18. #include "PPCSubtarget.h"
  19. #include "PPCTargetMachine.h"
  20. #include "llvm/ADT/APInt.h"
  21. #include "llvm/ADT/DenseMap.h"
  22. #include "llvm/ADT/STLExtras.h"
  23. #include "llvm/ADT/SmallPtrSet.h"
  24. #include "llvm/ADT/SmallVector.h"
  25. #include "llvm/ADT/Statistic.h"
  26. #include "llvm/Analysis/BranchProbabilityInfo.h"
  27. #include "llvm/CodeGen/FunctionLoweringInfo.h"
  28. #include "llvm/CodeGen/ISDOpcodes.h"
  29. #include "llvm/CodeGen/MachineBasicBlock.h"
  30. #include "llvm/CodeGen/MachineFrameInfo.h"
  31. #include "llvm/CodeGen/MachineFunction.h"
  32. #include "llvm/CodeGen/MachineInstrBuilder.h"
  33. #include "llvm/CodeGen/MachineRegisterInfo.h"
  34. #include "llvm/CodeGen/SelectionDAG.h"
  35. #include "llvm/CodeGen/SelectionDAGISel.h"
  36. #include "llvm/CodeGen/SelectionDAGNodes.h"
  37. #include "llvm/CodeGen/TargetInstrInfo.h"
  38. #include "llvm/CodeGen/TargetRegisterInfo.h"
  39. #include "llvm/CodeGen/ValueTypes.h"
  40. #include "llvm/IR/BasicBlock.h"
  41. #include "llvm/IR/DebugLoc.h"
  42. #include "llvm/IR/Function.h"
  43. #include "llvm/IR/GlobalValue.h"
  44. #include "llvm/IR/InlineAsm.h"
  45. #include "llvm/IR/InstrTypes.h"
  46. #include "llvm/IR/IntrinsicsPowerPC.h"
  47. #include "llvm/IR/Module.h"
  48. #include "llvm/Support/Casting.h"
  49. #include "llvm/Support/CodeGen.h"
  50. #include "llvm/Support/CommandLine.h"
  51. #include "llvm/Support/Compiler.h"
  52. #include "llvm/Support/Debug.h"
  53. #include "llvm/Support/ErrorHandling.h"
  54. #include "llvm/Support/KnownBits.h"
  55. #include "llvm/Support/MachineValueType.h"
  56. #include "llvm/Support/MathExtras.h"
  57. #include "llvm/Support/raw_ostream.h"
  58. #include <algorithm>
  59. #include <cassert>
  60. #include <cstdint>
  61. #include <iterator>
  62. #include <limits>
  63. #include <memory>
  64. #include <new>
  65. #include <tuple>
  66. #include <utility>
  67. using namespace llvm;
  68. #define DEBUG_TYPE "ppc-isel"
  69. #define PASS_NAME "PowerPC DAG->DAG Pattern Instruction Selection"
  70. STATISTIC(NumSextSetcc,
  71. "Number of (sext(setcc)) nodes expanded into GPR sequence.");
  72. STATISTIC(NumZextSetcc,
  73. "Number of (zext(setcc)) nodes expanded into GPR sequence.");
  74. STATISTIC(SignExtensionsAdded,
  75. "Number of sign extensions for compare inputs added.");
  76. STATISTIC(ZeroExtensionsAdded,
  77. "Number of zero extensions for compare inputs added.");
  78. STATISTIC(NumLogicOpsOnComparison,
  79. "Number of logical ops on i1 values calculated in GPR.");
  80. STATISTIC(OmittedForNonExtendUses,
  81. "Number of compares not eliminated as they have non-extending uses.");
  82. STATISTIC(NumP9Setb,
  83. "Number of compares lowered to setb.");
  84. // FIXME: Remove this once the bug has been fixed!
  85. cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
  86. cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden);
  87. static cl::opt<bool>
  88. UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true),
  89. cl::desc("use aggressive ppc isel for bit permutations"),
  90. cl::Hidden);
  91. static cl::opt<bool> BPermRewriterNoMasking(
  92. "ppc-bit-perm-rewriter-stress-rotates",
  93. cl::desc("stress rotate selection in aggressive ppc isel for "
  94. "bit permutations"),
  95. cl::Hidden);
  96. static cl::opt<bool> EnableBranchHint(
  97. "ppc-use-branch-hint", cl::init(true),
  98. cl::desc("Enable static hinting of branches on ppc"),
  99. cl::Hidden);
  100. static cl::opt<bool> EnableTLSOpt(
  101. "ppc-tls-opt", cl::init(true),
  102. cl::desc("Enable tls optimization peephole"),
  103. cl::Hidden);
  104. enum ICmpInGPRType { ICGPR_All, ICGPR_None, ICGPR_I32, ICGPR_I64,
  105. ICGPR_NonExtIn, ICGPR_Zext, ICGPR_Sext, ICGPR_ZextI32,
  106. ICGPR_SextI32, ICGPR_ZextI64, ICGPR_SextI64 };
  107. static cl::opt<ICmpInGPRType> CmpInGPR(
  108. "ppc-gpr-icmps", cl::Hidden, cl::init(ICGPR_All),
  109. cl::desc("Specify the types of comparisons to emit GPR-only code for."),
  110. cl::values(clEnumValN(ICGPR_None, "none", "Do not modify integer comparisons."),
  111. clEnumValN(ICGPR_All, "all", "All possible int comparisons in GPRs."),
  112. clEnumValN(ICGPR_I32, "i32", "Only i32 comparisons in GPRs."),
  113. clEnumValN(ICGPR_I64, "i64", "Only i64 comparisons in GPRs."),
  114. clEnumValN(ICGPR_NonExtIn, "nonextin",
  115. "Only comparisons where inputs don't need [sz]ext."),
  116. clEnumValN(ICGPR_Zext, "zext", "Only comparisons with zext result."),
  117. clEnumValN(ICGPR_ZextI32, "zexti32",
  118. "Only i32 comparisons with zext result."),
  119. clEnumValN(ICGPR_ZextI64, "zexti64",
  120. "Only i64 comparisons with zext result."),
  121. clEnumValN(ICGPR_Sext, "sext", "Only comparisons with sext result."),
  122. clEnumValN(ICGPR_SextI32, "sexti32",
  123. "Only i32 comparisons with sext result."),
  124. clEnumValN(ICGPR_SextI64, "sexti64",
  125. "Only i64 comparisons with sext result.")));
  126. namespace {
  127. //===--------------------------------------------------------------------===//
  128. /// PPCDAGToDAGISel - PPC specific code to select PPC machine
  129. /// instructions for SelectionDAG operations.
  130. ///
  131. class PPCDAGToDAGISel : public SelectionDAGISel {
  132. const PPCTargetMachine &TM;
  133. const PPCSubtarget *Subtarget = nullptr;
  134. const PPCTargetLowering *PPCLowering = nullptr;
  135. unsigned GlobalBaseReg = 0;
  136. public:
  137. static char ID;
  138. PPCDAGToDAGISel() = delete;
  139. explicit PPCDAGToDAGISel(PPCTargetMachine &tm, CodeGenOpt::Level OptLevel)
  140. : SelectionDAGISel(ID, tm, OptLevel), TM(tm) {}
  141. bool runOnMachineFunction(MachineFunction &MF) override {
  142. // Make sure we re-emit a set of the global base reg if necessary
  143. GlobalBaseReg = 0;
  144. Subtarget = &MF.getSubtarget<PPCSubtarget>();
  145. PPCLowering = Subtarget->getTargetLowering();
  146. if (Subtarget->hasROPProtect()) {
  147. // Create a place on the stack for the ROP Protection Hash.
  148. // The ROP Protection Hash will always be 8 bytes and aligned to 8
  149. // bytes.
  150. MachineFrameInfo &MFI = MF.getFrameInfo();
  151. PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
  152. const int Result = MFI.CreateStackObject(8, Align(8), false);
  153. FI->setROPProtectionHashSaveIndex(Result);
  154. }
  155. SelectionDAGISel::runOnMachineFunction(MF);
  156. return true;
  157. }
  158. void PreprocessISelDAG() override;
  159. void PostprocessISelDAG() override;
  160. /// getI16Imm - Return a target constant with the specified value, of type
  161. /// i16.
  162. inline SDValue getI16Imm(unsigned Imm, const SDLoc &dl) {
  163. return CurDAG->getTargetConstant(Imm, dl, MVT::i16);
  164. }
  165. /// getI32Imm - Return a target constant with the specified value, of type
  166. /// i32.
  167. inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
  168. return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
  169. }
  170. /// getI64Imm - Return a target constant with the specified value, of type
  171. /// i64.
  172. inline SDValue getI64Imm(uint64_t Imm, const SDLoc &dl) {
  173. return CurDAG->getTargetConstant(Imm, dl, MVT::i64);
  174. }
  175. /// getSmallIPtrImm - Return a target constant of pointer type.
  176. inline SDValue getSmallIPtrImm(uint64_t Imm, const SDLoc &dl) {
  177. return CurDAG->getTargetConstant(
  178. Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout()));
  179. }
  180. /// isRotateAndMask - Returns true if Mask and Shift can be folded into a
  181. /// rotate and mask opcode and mask operation.
  182. static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask,
  183. unsigned &SH, unsigned &MB, unsigned &ME);
  184. /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
  185. /// base register. Return the virtual register that holds this value.
  186. SDNode *getGlobalBaseReg();
  187. void selectFrameIndex(SDNode *SN, SDNode *N, uint64_t Offset = 0);
  188. // Select - Convert the specified operand from a target-independent to a
  189. // target-specific node if it hasn't already been changed.
  190. void Select(SDNode *N) override;
  191. bool tryBitfieldInsert(SDNode *N);
  192. bool tryBitPermutation(SDNode *N);
  193. bool tryIntCompareInGPR(SDNode *N);
  194. // tryTLSXFormLoad - Convert an ISD::LOAD fed by a PPCISD::ADD_TLS into
  195. // an X-Form load instruction with the offset being a relocation coming from
  196. // the PPCISD::ADD_TLS.
  197. bool tryTLSXFormLoad(LoadSDNode *N);
  198. // tryTLSXFormStore - Convert an ISD::STORE fed by a PPCISD::ADD_TLS into
  199. // an X-Form store instruction with the offset being a relocation coming from
  200. // the PPCISD::ADD_TLS.
  201. bool tryTLSXFormStore(StoreSDNode *N);
  202. /// SelectCC - Select a comparison of the specified values with the
  203. /// specified condition code, returning the CR# of the expression.
  204. SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
  205. const SDLoc &dl, SDValue Chain = SDValue());
  206. /// SelectAddrImmOffs - Return true if the operand is valid for a preinc
  207. /// immediate field. Note that the operand at this point is already the
  208. /// result of a prior SelectAddressRegImm call.
  209. bool SelectAddrImmOffs(SDValue N, SDValue &Out) const {
  210. if (N.getOpcode() == ISD::TargetConstant ||
  211. N.getOpcode() == ISD::TargetGlobalAddress) {
  212. Out = N;
  213. return true;
  214. }
  215. return false;
  216. }
  217. /// SelectDSForm - Returns true if address N can be represented by the
  218. /// addressing mode of DSForm instructions (a base register, plus a signed
  219. /// 16-bit displacement that is a multiple of 4.
  220. bool SelectDSForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
  221. return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
  222. Align(4)) == PPC::AM_DSForm;
  223. }
  224. /// SelectDQForm - Returns true if address N can be represented by the
  225. /// addressing mode of DQForm instructions (a base register, plus a signed
  226. /// 16-bit displacement that is a multiple of 16.
  227. bool SelectDQForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
  228. return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
  229. Align(16)) == PPC::AM_DQForm;
  230. }
  231. /// SelectDForm - Returns true if address N can be represented by
  232. /// the addressing mode of DForm instructions (a base register, plus a
  233. /// signed 16-bit immediate.
  234. bool SelectDForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
  235. return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
  236. std::nullopt) == PPC::AM_DForm;
  237. }
  238. /// SelectPCRelForm - Returns true if address N can be represented by
  239. /// PC-Relative addressing mode.
  240. bool SelectPCRelForm(SDNode *Parent, SDValue N, SDValue &Disp,
  241. SDValue &Base) {
  242. return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
  243. std::nullopt) == PPC::AM_PCRel;
  244. }
  245. /// SelectPDForm - Returns true if address N can be represented by Prefixed
  246. /// DForm addressing mode (a base register, plus a signed 34-bit immediate.
  247. bool SelectPDForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
  248. return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
  249. std::nullopt) ==
  250. PPC::AM_PrefixDForm;
  251. }
  252. /// SelectXForm - Returns true if address N can be represented by the
  253. /// addressing mode of XForm instructions (an indexed [r+r] operation).
  254. bool SelectXForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
  255. return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
  256. std::nullopt) == PPC::AM_XForm;
  257. }
  258. /// SelectForceXForm - Given the specified address, force it to be
  259. /// represented as an indexed [r+r] operation (an XForm instruction).
  260. bool SelectForceXForm(SDNode *Parent, SDValue N, SDValue &Disp,
  261. SDValue &Base) {
  262. return PPCLowering->SelectForceXFormMode(N, Disp, Base, *CurDAG) ==
  263. PPC::AM_XForm;
  264. }
  265. /// SelectAddrIdx - Given the specified address, check to see if it can be
  266. /// represented as an indexed [r+r] operation.
  267. /// This is for xform instructions whose associated displacement form is D.
  268. /// The last parameter \p 0 means associated D form has no requirment for 16
  269. /// bit signed displacement.
  270. /// Returns false if it can be represented by [r+imm], which are preferred.
  271. bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) {
  272. return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG,
  273. std::nullopt);
  274. }
  275. /// SelectAddrIdx4 - Given the specified address, check to see if it can be
  276. /// represented as an indexed [r+r] operation.
  277. /// This is for xform instructions whose associated displacement form is DS.
  278. /// The last parameter \p 4 means associated DS form 16 bit signed
  279. /// displacement must be a multiple of 4.
  280. /// Returns false if it can be represented by [r+imm], which are preferred.
  281. bool SelectAddrIdxX4(SDValue N, SDValue &Base, SDValue &Index) {
  282. return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG,
  283. Align(4));
  284. }
  285. /// SelectAddrIdx16 - Given the specified address, check to see if it can be
  286. /// represented as an indexed [r+r] operation.
  287. /// This is for xform instructions whose associated displacement form is DQ.
  288. /// The last parameter \p 16 means associated DQ form 16 bit signed
  289. /// displacement must be a multiple of 16.
  290. /// Returns false if it can be represented by [r+imm], which are preferred.
  291. bool SelectAddrIdxX16(SDValue N, SDValue &Base, SDValue &Index) {
  292. return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG,
  293. Align(16));
  294. }
  295. /// SelectAddrIdxOnly - Given the specified address, force it to be
  296. /// represented as an indexed [r+r] operation.
  297. bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) {
  298. return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
  299. }
  300. /// SelectAddrImm - Returns true if the address N can be represented by
  301. /// a base register plus a signed 16-bit displacement [r+imm].
  302. /// The last parameter \p 0 means D form has no requirment for 16 bit signed
  303. /// displacement.
  304. bool SelectAddrImm(SDValue N, SDValue &Disp,
  305. SDValue &Base) {
  306. return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG,
  307. std::nullopt);
  308. }
  309. /// SelectAddrImmX4 - Returns true if the address N can be represented by
  310. /// a base register plus a signed 16-bit displacement that is a multiple of
  311. /// 4 (last parameter). Suitable for use by STD and friends.
  312. bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) {
  313. return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, Align(4));
  314. }
  315. /// SelectAddrImmX16 - Returns true if the address N can be represented by
  316. /// a base register plus a signed 16-bit displacement that is a multiple of
  317. /// 16(last parameter). Suitable for use by STXV and friends.
  318. bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) {
  319. return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG,
  320. Align(16));
  321. }
  322. /// SelectAddrImmX34 - Returns true if the address N can be represented by
  323. /// a base register plus a signed 34-bit displacement. Suitable for use by
  324. /// PSTXVP and friends.
  325. bool SelectAddrImmX34(SDValue N, SDValue &Disp, SDValue &Base) {
  326. return PPCLowering->SelectAddressRegImm34(N, Disp, Base, *CurDAG);
  327. }
  328. // Select an address into a single register.
  329. bool SelectAddr(SDValue N, SDValue &Base) {
  330. Base = N;
  331. return true;
  332. }
  333. bool SelectAddrPCRel(SDValue N, SDValue &Base) {
  334. return PPCLowering->SelectAddressPCRel(N, Base);
  335. }
  336. /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
  337. /// inline asm expressions. It is always correct to compute the value into
  338. /// a register. The case of adding a (possibly relocatable) constant to a
  339. /// register can be improved, but it is wrong to substitute Reg+Reg for
  340. /// Reg in an asm, because the load or store opcode would have to change.
  341. bool SelectInlineAsmMemoryOperand(const SDValue &Op,
  342. unsigned ConstraintID,
  343. std::vector<SDValue> &OutOps) override {
  344. switch(ConstraintID) {
  345. default:
  346. errs() << "ConstraintID: " << ConstraintID << "\n";
  347. llvm_unreachable("Unexpected asm memory constraint");
  348. case InlineAsm::Constraint_es:
  349. case InlineAsm::Constraint_m:
  350. case InlineAsm::Constraint_o:
  351. case InlineAsm::Constraint_Q:
  352. case InlineAsm::Constraint_Z:
  353. case InlineAsm::Constraint_Zy:
  354. // We need to make sure that this one operand does not end up in r0
  355. // (because we might end up lowering this as 0(%op)).
  356. const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
  357. const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1);
  358. SDLoc dl(Op);
  359. SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32);
  360. SDValue NewOp =
  361. SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
  362. dl, Op.getValueType(),
  363. Op, RC), 0);
  364. OutOps.push_back(NewOp);
  365. return false;
  366. }
  367. return true;
  368. }
  369. // Include the pieces autogenerated from the target description.
  370. #include "PPCGenDAGISel.inc"
  371. private:
  372. bool trySETCC(SDNode *N);
  373. bool tryFoldSWTestBRCC(SDNode *N);
  374. bool trySelectLoopCountIntrinsic(SDNode *N);
  375. bool tryAsSingleRLDICL(SDNode *N);
  376. bool tryAsSingleRLDICR(SDNode *N);
  377. bool tryAsSingleRLWINM(SDNode *N);
  378. bool tryAsSingleRLWINM8(SDNode *N);
  379. bool tryAsSingleRLWIMI(SDNode *N);
  380. bool tryAsPairOfRLDICL(SDNode *N);
  381. bool tryAsSingleRLDIMI(SDNode *N);
  382. void PeepholePPC64();
  383. void PeepholePPC64ZExt();
  384. void PeepholeCROps();
  385. SDValue combineToCMPB(SDNode *N);
  386. void foldBoolExts(SDValue &Res, SDNode *&N);
  387. bool AllUsersSelectZero(SDNode *N);
  388. void SwapAllSelectUsers(SDNode *N);
  389. bool isOffsetMultipleOf(SDNode *N, unsigned Val) const;
  390. void transferMemOperands(SDNode *N, SDNode *Result);
  391. };
  392. } // end anonymous namespace
  393. char PPCDAGToDAGISel::ID = 0;
  394. INITIALIZE_PASS(PPCDAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false)
  395. /// getGlobalBaseReg - Output the instructions required to put the
  396. /// base address to use for accessing globals into a register.
  397. ///
  398. SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
  399. if (!GlobalBaseReg) {
  400. const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
  401. // Insert the set of GlobalBaseReg into the first MBB of the function
  402. MachineBasicBlock &FirstMBB = MF->front();
  403. MachineBasicBlock::iterator MBBI = FirstMBB.begin();
  404. const Module *M = MF->getFunction().getParent();
  405. DebugLoc dl;
  406. if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) {
  407. if (Subtarget->isTargetELF()) {
  408. GlobalBaseReg = PPC::R30;
  409. if (!Subtarget->isSecurePlt() &&
  410. M->getPICLevel() == PICLevel::SmallPIC) {
  411. BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR));
  412. BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
  413. MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
  414. } else {
  415. BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
  416. BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
  417. Register TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
  418. BuildMI(FirstMBB, MBBI, dl,
  419. TII.get(PPC::UpdateGBR), GlobalBaseReg)
  420. .addReg(TempReg, RegState::Define).addReg(GlobalBaseReg);
  421. MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
  422. }
  423. } else {
  424. GlobalBaseReg =
  425. RegInfo->createVirtualRegister(&PPC::GPRC_and_GPRC_NOR0RegClass);
  426. BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
  427. BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
  428. }
  429. } else {
  430. // We must ensure that this sequence is dominated by the prologue.
  431. // FIXME: This is a bit of a big hammer since we don't get the benefits
  432. // of shrink-wrapping whenever we emit this instruction. Considering
  433. // this is used in any function where we emit a jump table, this may be
  434. // a significant limitation. We should consider inserting this in the
  435. // block where it is used and then commoning this sequence up if it
  436. // appears in multiple places.
  437. // Note: on ISA 3.0 cores, we can use lnia (addpcis) instead of
  438. // MovePCtoLR8.
  439. MF->getInfo<PPCFunctionInfo>()->setShrinkWrapDisabled(true);
  440. GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
  441. BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8));
  442. BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg);
  443. }
  444. }
  445. return CurDAG->getRegister(GlobalBaseReg,
  446. PPCLowering->getPointerTy(CurDAG->getDataLayout()))
  447. .getNode();
  448. }
  449. // Check if a SDValue has the toc-data attribute.
  450. static bool hasTocDataAttr(SDValue Val, unsigned PointerSize) {
  451. GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val);
  452. if (!GA)
  453. return false;
  454. const GlobalVariable *GV = dyn_cast_or_null<GlobalVariable>(GA->getGlobal());
  455. if (!GV)
  456. return false;
  457. if (!GV->hasAttribute("toc-data"))
  458. return false;
  459. // TODO: These asserts should be updated as more support for the toc data
  460. // transformation is added (struct support, etc.).
  461. assert(
  462. PointerSize >= GV->getAlign().valueOrOne().value() &&
  463. "GlobalVariables with an alignment requirement stricter than TOC entry "
  464. "size not supported by the toc data transformation.");
  465. Type *GVType = GV->getValueType();
  466. assert(GVType->isSized() && "A GlobalVariable's size must be known to be "
  467. "supported by the toc data transformation.");
  468. if (GVType->isVectorTy())
  469. report_fatal_error("A GlobalVariable of Vector type is not currently "
  470. "supported by the toc data transformation.");
  471. if (GVType->isArrayTy())
  472. report_fatal_error("A GlobalVariable of Array type is not currently "
  473. "supported by the toc data transformation.");
  474. if (GVType->isStructTy())
  475. report_fatal_error("A GlobalVariable of Struct type is not currently "
  476. "supported by the toc data transformation.");
  477. assert(GVType->getPrimitiveSizeInBits() <= PointerSize * 8 &&
  478. "A GlobalVariable with size larger than a TOC entry is not currently "
  479. "supported by the toc data transformation.");
  480. if (GV->hasLocalLinkage() || GV->hasPrivateLinkage())
  481. report_fatal_error("A GlobalVariable with private or local linkage is not "
  482. "currently supported by the toc data transformation.");
  483. assert(!GV->hasCommonLinkage() &&
  484. "Tentative definitions cannot have the mapping class XMC_TD.");
  485. return true;
  486. }
  487. /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
  488. /// operand. If so Imm will receive the 32-bit value.
  489. static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
  490. if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
  491. Imm = cast<ConstantSDNode>(N)->getZExtValue();
  492. return true;
  493. }
  494. return false;
  495. }
  496. /// isInt64Immediate - This method tests to see if the node is a 64-bit constant
  497. /// operand. If so Imm will receive the 64-bit value.
  498. static bool isInt64Immediate(SDNode *N, uint64_t &Imm) {
  499. if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i64) {
  500. Imm = cast<ConstantSDNode>(N)->getZExtValue();
  501. return true;
  502. }
  503. return false;
  504. }
  505. // isInt32Immediate - This method tests to see if a constant operand.
  506. // If so Imm will receive the 32 bit value.
  507. static bool isInt32Immediate(SDValue N, unsigned &Imm) {
  508. return isInt32Immediate(N.getNode(), Imm);
  509. }
  510. /// isInt64Immediate - This method tests to see if the value is a 64-bit
  511. /// constant operand. If so Imm will receive the 64-bit value.
  512. static bool isInt64Immediate(SDValue N, uint64_t &Imm) {
  513. return isInt64Immediate(N.getNode(), Imm);
  514. }
  515. static unsigned getBranchHint(unsigned PCC,
  516. const FunctionLoweringInfo &FuncInfo,
  517. const SDValue &DestMBB) {
  518. assert(isa<BasicBlockSDNode>(DestMBB));
  519. if (!FuncInfo.BPI) return PPC::BR_NO_HINT;
  520. const BasicBlock *BB = FuncInfo.MBB->getBasicBlock();
  521. const Instruction *BBTerm = BB->getTerminator();
  522. if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT;
  523. const BasicBlock *TBB = BBTerm->getSuccessor(0);
  524. const BasicBlock *FBB = BBTerm->getSuccessor(1);
  525. auto TProb = FuncInfo.BPI->getEdgeProbability(BB, TBB);
  526. auto FProb = FuncInfo.BPI->getEdgeProbability(BB, FBB);
  527. // We only want to handle cases which are easy to predict at static time, e.g.
  528. // C++ throw statement, that is very likely not taken, or calling never
  529. // returned function, e.g. stdlib exit(). So we set Threshold to filter
  530. // unwanted cases.
  531. //
  532. // Below is LLVM branch weight table, we only want to handle case 1, 2
  533. //
  534. // Case Taken:Nontaken Example
  535. // 1. Unreachable 1048575:1 C++ throw, stdlib exit(),
  536. // 2. Invoke-terminating 1:1048575
  537. // 3. Coldblock 4:64 __builtin_expect
  538. // 4. Loop Branch 124:4 For loop
  539. // 5. PH/ZH/FPH 20:12
  540. const uint32_t Threshold = 10000;
  541. if (std::max(TProb, FProb) / Threshold < std::min(TProb, FProb))
  542. return PPC::BR_NO_HINT;
  543. LLVM_DEBUG(dbgs() << "Use branch hint for '" << FuncInfo.Fn->getName()
  544. << "::" << BB->getName() << "'\n"
  545. << " -> " << TBB->getName() << ": " << TProb << "\n"
  546. << " -> " << FBB->getName() << ": " << FProb << "\n");
  547. const BasicBlockSDNode *BBDN = cast<BasicBlockSDNode>(DestMBB);
  548. // If Dest BasicBlock is False-BasicBlock (FBB), swap branch probabilities,
  549. // because we want 'TProb' stands for 'branch probability' to Dest BasicBlock
  550. if (BBDN->getBasicBlock()->getBasicBlock() != TBB)
  551. std::swap(TProb, FProb);
  552. return (TProb > FProb) ? PPC::BR_TAKEN_HINT : PPC::BR_NONTAKEN_HINT;
  553. }
  554. // isOpcWithIntImmediate - This method tests to see if the node is a specific
  555. // opcode and that it has a immediate integer right operand.
  556. // If so Imm will receive the 32 bit value.
  557. static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
  558. return N->getOpcode() == Opc
  559. && isInt32Immediate(N->getOperand(1).getNode(), Imm);
  560. }
  561. void PPCDAGToDAGISel::selectFrameIndex(SDNode *SN, SDNode *N, uint64_t Offset) {
  562. SDLoc dl(SN);
  563. int FI = cast<FrameIndexSDNode>(N)->getIndex();
  564. SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0));
  565. unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8;
  566. if (SN->hasOneUse())
  567. CurDAG->SelectNodeTo(SN, Opc, N->getValueType(0), TFI,
  568. getSmallIPtrImm(Offset, dl));
  569. else
  570. ReplaceNode(SN, CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI,
  571. getSmallIPtrImm(Offset, dl)));
  572. }
  573. bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
  574. bool isShiftMask, unsigned &SH,
  575. unsigned &MB, unsigned &ME) {
  576. // Don't even go down this path for i64, since different logic will be
  577. // necessary for rldicl/rldicr/rldimi.
  578. if (N->getValueType(0) != MVT::i32)
  579. return false;
  580. unsigned Shift = 32;
  581. unsigned Indeterminant = ~0; // bit mask marking indeterminant results
  582. unsigned Opcode = N->getOpcode();
  583. if (N->getNumOperands() != 2 ||
  584. !isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31))
  585. return false;
  586. if (Opcode == ISD::SHL) {
  587. // apply shift left to mask if it comes first
  588. if (isShiftMask) Mask = Mask << Shift;
  589. // determine which bits are made indeterminant by shift
  590. Indeterminant = ~(0xFFFFFFFFu << Shift);
  591. } else if (Opcode == ISD::SRL) {
  592. // apply shift right to mask if it comes first
  593. if (isShiftMask) Mask = Mask >> Shift;
  594. // determine which bits are made indeterminant by shift
  595. Indeterminant = ~(0xFFFFFFFFu >> Shift);
  596. // adjust for the left rotate
  597. Shift = 32 - Shift;
  598. } else if (Opcode == ISD::ROTL) {
  599. Indeterminant = 0;
  600. } else {
  601. return false;
  602. }
  603. // if the mask doesn't intersect any Indeterminant bits
  604. if (Mask && !(Mask & Indeterminant)) {
  605. SH = Shift & 31;
  606. // make sure the mask is still a mask (wrap arounds may not be)
  607. return isRunOfOnes(Mask, MB, ME);
  608. }
  609. return false;
  610. }
  611. bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) {
  612. SDValue Base = ST->getBasePtr();
  613. if (Base.getOpcode() != PPCISD::ADD_TLS)
  614. return false;
  615. SDValue Offset = ST->getOffset();
  616. if (!Offset.isUndef())
  617. return false;
  618. if (Base.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR)
  619. return false;
  620. SDLoc dl(ST);
  621. EVT MemVT = ST->getMemoryVT();
  622. EVT RegVT = ST->getValue().getValueType();
  623. unsigned Opcode;
  624. switch (MemVT.getSimpleVT().SimpleTy) {
  625. default:
  626. return false;
  627. case MVT::i8: {
  628. Opcode = (RegVT == MVT::i32) ? PPC::STBXTLS_32 : PPC::STBXTLS;
  629. break;
  630. }
  631. case MVT::i16: {
  632. Opcode = (RegVT == MVT::i32) ? PPC::STHXTLS_32 : PPC::STHXTLS;
  633. break;
  634. }
  635. case MVT::i32: {
  636. Opcode = (RegVT == MVT::i32) ? PPC::STWXTLS_32 : PPC::STWXTLS;
  637. break;
  638. }
  639. case MVT::i64: {
  640. Opcode = PPC::STDXTLS;
  641. break;
  642. }
  643. }
  644. SDValue Chain = ST->getChain();
  645. SDVTList VTs = ST->getVTList();
  646. SDValue Ops[] = {ST->getValue(), Base.getOperand(0), Base.getOperand(1),
  647. Chain};
  648. SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
  649. transferMemOperands(ST, MN);
  650. ReplaceNode(ST, MN);
  651. return true;
  652. }
  653. bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) {
  654. SDValue Base = LD->getBasePtr();
  655. if (Base.getOpcode() != PPCISD::ADD_TLS)
  656. return false;
  657. SDValue Offset = LD->getOffset();
  658. if (!Offset.isUndef())
  659. return false;
  660. if (Base.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR)
  661. return false;
  662. SDLoc dl(LD);
  663. EVT MemVT = LD->getMemoryVT();
  664. EVT RegVT = LD->getValueType(0);
  665. unsigned Opcode;
  666. switch (MemVT.getSimpleVT().SimpleTy) {
  667. default:
  668. return false;
  669. case MVT::i8: {
  670. Opcode = (RegVT == MVT::i32) ? PPC::LBZXTLS_32 : PPC::LBZXTLS;
  671. break;
  672. }
  673. case MVT::i16: {
  674. Opcode = (RegVT == MVT::i32) ? PPC::LHZXTLS_32 : PPC::LHZXTLS;
  675. break;
  676. }
  677. case MVT::i32: {
  678. Opcode = (RegVT == MVT::i32) ? PPC::LWZXTLS_32 : PPC::LWZXTLS;
  679. break;
  680. }
  681. case MVT::i64: {
  682. Opcode = PPC::LDXTLS;
  683. break;
  684. }
  685. }
  686. SDValue Chain = LD->getChain();
  687. SDVTList VTs = LD->getVTList();
  688. SDValue Ops[] = {Base.getOperand(0), Base.getOperand(1), Chain};
  689. SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
  690. transferMemOperands(LD, MN);
  691. ReplaceNode(LD, MN);
  692. return true;
  693. }
  694. /// Turn an or of two masked values into the rotate left word immediate then
  695. /// mask insert (rlwimi) instruction.
  696. bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {
  697. SDValue Op0 = N->getOperand(0);
  698. SDValue Op1 = N->getOperand(1);
  699. SDLoc dl(N);
  700. KnownBits LKnown = CurDAG->computeKnownBits(Op0);
  701. KnownBits RKnown = CurDAG->computeKnownBits(Op1);
  702. unsigned TargetMask = LKnown.Zero.getZExtValue();
  703. unsigned InsertMask = RKnown.Zero.getZExtValue();
  704. if ((TargetMask | InsertMask) == 0xFFFFFFFF) {
  705. unsigned Op0Opc = Op0.getOpcode();
  706. unsigned Op1Opc = Op1.getOpcode();
  707. unsigned Value, SH = 0;
  708. TargetMask = ~TargetMask;
  709. InsertMask = ~InsertMask;
  710. // If the LHS has a foldable shift and the RHS does not, then swap it to the
  711. // RHS so that we can fold the shift into the insert.
  712. if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) {
  713. if (Op0.getOperand(0).getOpcode() == ISD::SHL ||
  714. Op0.getOperand(0).getOpcode() == ISD::SRL) {
  715. if (Op1.getOperand(0).getOpcode() != ISD::SHL &&
  716. Op1.getOperand(0).getOpcode() != ISD::SRL) {
  717. std::swap(Op0, Op1);
  718. std::swap(Op0Opc, Op1Opc);
  719. std::swap(TargetMask, InsertMask);
  720. }
  721. }
  722. } else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) {
  723. if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL &&
  724. Op1.getOperand(0).getOpcode() != ISD::SRL) {
  725. std::swap(Op0, Op1);
  726. std::swap(Op0Opc, Op1Opc);
  727. std::swap(TargetMask, InsertMask);
  728. }
  729. }
  730. unsigned MB, ME;
  731. if (isRunOfOnes(InsertMask, MB, ME)) {
  732. if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) &&
  733. isInt32Immediate(Op1.getOperand(1), Value)) {
  734. Op1 = Op1.getOperand(0);
  735. SH = (Op1Opc == ISD::SHL) ? Value : 32 - Value;
  736. }
  737. if (Op1Opc == ISD::AND) {
  738. // The AND mask might not be a constant, and we need to make sure that
  739. // if we're going to fold the masking with the insert, all bits not
  740. // know to be zero in the mask are known to be one.
  741. KnownBits MKnown = CurDAG->computeKnownBits(Op1.getOperand(1));
  742. bool CanFoldMask = InsertMask == MKnown.One.getZExtValue();
  743. unsigned SHOpc = Op1.getOperand(0).getOpcode();
  744. if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && CanFoldMask &&
  745. isInt32Immediate(Op1.getOperand(0).getOperand(1), Value)) {
  746. // Note that Value must be in range here (less than 32) because
  747. // otherwise there would not be any bits set in InsertMask.
  748. Op1 = Op1.getOperand(0).getOperand(0);
  749. SH = (SHOpc == ISD::SHL) ? Value : 32 - Value;
  750. }
  751. }
  752. SH &= 31;
  753. SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl),
  754. getI32Imm(ME, dl) };
  755. ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
  756. return true;
  757. }
  758. }
  759. return false;
  760. }
  761. static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N) {
  762. unsigned MaxTruncation = 0;
  763. // Cannot use range-based for loop here as we need the actual use (i.e. we
  764. // need the operand number corresponding to the use). A range-based for
  765. // will unbox the use and provide an SDNode*.
  766. for (SDNode::use_iterator Use = N->use_begin(), UseEnd = N->use_end();
  767. Use != UseEnd; ++Use) {
  768. unsigned Opc =
  769. Use->isMachineOpcode() ? Use->getMachineOpcode() : Use->getOpcode();
  770. switch (Opc) {
  771. default: return 0;
  772. case ISD::TRUNCATE:
  773. if (Use->isMachineOpcode())
  774. return 0;
  775. MaxTruncation =
  776. std::max(MaxTruncation, (unsigned)Use->getValueType(0).getSizeInBits());
  777. continue;
  778. case ISD::STORE: {
  779. if (Use->isMachineOpcode())
  780. return 0;
  781. StoreSDNode *STN = cast<StoreSDNode>(*Use);
  782. unsigned MemVTSize = STN->getMemoryVT().getSizeInBits();
  783. if (MemVTSize == 64 || Use.getOperandNo() != 0)
  784. return 0;
  785. MaxTruncation = std::max(MaxTruncation, MemVTSize);
  786. continue;
  787. }
  788. case PPC::STW8:
  789. case PPC::STWX8:
  790. case PPC::STWU8:
  791. case PPC::STWUX8:
  792. if (Use.getOperandNo() != 0)
  793. return 0;
  794. MaxTruncation = std::max(MaxTruncation, 32u);
  795. continue;
  796. case PPC::STH8:
  797. case PPC::STHX8:
  798. case PPC::STHU8:
  799. case PPC::STHUX8:
  800. if (Use.getOperandNo() != 0)
  801. return 0;
  802. MaxTruncation = std::max(MaxTruncation, 16u);
  803. continue;
  804. case PPC::STB8:
  805. case PPC::STBX8:
  806. case PPC::STBU8:
  807. case PPC::STBUX8:
  808. if (Use.getOperandNo() != 0)
  809. return 0;
  810. MaxTruncation = std::max(MaxTruncation, 8u);
  811. continue;
  812. }
  813. }
  814. return MaxTruncation;
  815. }
  816. // For any 32 < Num < 64, check if the Imm contains at least Num consecutive
  817. // zeros and return the number of bits by the left of these consecutive zeros.
  818. static int findContiguousZerosAtLeast(uint64_t Imm, unsigned Num) {
  819. unsigned HiTZ = countTrailingZeros<uint32_t>(Hi_32(Imm));
  820. unsigned LoLZ = countLeadingZeros<uint32_t>(Lo_32(Imm));
  821. if ((HiTZ + LoLZ) >= Num)
  822. return (32 + HiTZ);
  823. return 0;
  824. }
  825. // Direct materialization of 64-bit constants by enumerated patterns.
  826. static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl,
  827. uint64_t Imm, unsigned &InstCnt) {
  828. unsigned TZ = countTrailingZeros<uint64_t>(Imm);
  829. unsigned LZ = countLeadingZeros<uint64_t>(Imm);
  830. unsigned TO = countTrailingOnes<uint64_t>(Imm);
  831. unsigned LO = countLeadingOnes<uint64_t>(Imm);
  832. unsigned Hi32 = Hi_32(Imm);
  833. unsigned Lo32 = Lo_32(Imm);
  834. SDNode *Result = nullptr;
  835. unsigned Shift = 0;
  836. auto getI32Imm = [CurDAG, dl](unsigned Imm) {
  837. return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
  838. };
  839. // Following patterns use 1 instructions to materialize the Imm.
  840. InstCnt = 1;
  841. // 1-1) Patterns : {zeros}{15-bit valve}
  842. // {ones}{15-bit valve}
  843. if (isInt<16>(Imm)) {
  844. SDValue SDImm = CurDAG->getTargetConstant(Imm, dl, MVT::i64);
  845. return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);
  846. }
  847. // 1-2) Patterns : {zeros}{15-bit valve}{16 zeros}
  848. // {ones}{15-bit valve}{16 zeros}
  849. if (TZ > 15 && (LZ > 32 || LO > 32))
  850. return CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
  851. getI32Imm((Imm >> 16) & 0xffff));
  852. // Following patterns use 2 instructions to materialize the Imm.
  853. InstCnt = 2;
  854. assert(LZ < 64 && "Unexpected leading zeros here.");
  855. // Count of ones follwing the leading zeros.
  856. unsigned FO = countLeadingOnes<uint64_t>(Imm << LZ);
  857. // 2-1) Patterns : {zeros}{31-bit value}
  858. // {ones}{31-bit value}
  859. if (isInt<32>(Imm)) {
  860. uint64_t ImmHi16 = (Imm >> 16) & 0xffff;
  861. unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
  862. Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
  863. return CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
  864. getI32Imm(Imm & 0xffff));
  865. }
  866. // 2-2) Patterns : {zeros}{ones}{15-bit value}{zeros}
  867. // {zeros}{15-bit value}{zeros}
  868. // {zeros}{ones}{15-bit value}
  869. // {ones}{15-bit value}{zeros}
  870. // We can take advantage of LI's sign-extension semantics to generate leading
  871. // ones, and then use RLDIC to mask off the ones in both sides after rotation.
  872. if ((LZ + FO + TZ) > 48) {
  873. Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
  874. getI32Imm((Imm >> TZ) & 0xffff));
  875. return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),
  876. getI32Imm(TZ), getI32Imm(LZ));
  877. }
  878. // 2-3) Pattern : {zeros}{15-bit value}{ones}
  879. // Shift right the Imm by (48 - LZ) bits to construct a negtive 16 bits value,
  880. // therefore we can take advantage of LI's sign-extension semantics, and then
  881. // mask them off after rotation.
  882. //
  883. // +--LZ--||-15-bit-||--TO--+ +-------------|--16-bit--+
  884. // |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1|
  885. // +------------------------+ +------------------------+
  886. // 63 0 63 0
  887. // Imm (Imm >> (48 - LZ) & 0xffff)
  888. // +----sext-----|--16-bit--+ +clear-|-----------------+
  889. // |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111|
  890. // +------------------------+ +------------------------+
  891. // 63 0 63 0
  892. // LI8: sext many leading zeros RLDICL: rotate left (48 - LZ), clear left LZ
  893. if ((LZ + TO) > 48) {
  894. // Since the immediates with (LZ > 32) have been handled by previous
  895. // patterns, here we have (LZ <= 32) to make sure we will not shift right
  896. // the Imm by a negative value.
  897. assert(LZ <= 32 && "Unexpected shift value.");
  898. Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
  899. getI32Imm((Imm >> (48 - LZ) & 0xffff)));
  900. return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
  901. getI32Imm(48 - LZ), getI32Imm(LZ));
  902. }
  903. // 2-4) Patterns : {zeros}{ones}{15-bit value}{ones}
  904. // {ones}{15-bit value}{ones}
  905. // We can take advantage of LI's sign-extension semantics to generate leading
  906. // ones, and then use RLDICL to mask off the ones in left sides (if required)
  907. // after rotation.
  908. //
  909. // +-LZ-FO||-15-bit-||--TO--+ +-------------|--16-bit--+
  910. // |00011110bbbbbbbbb1111111| -> |000000000011110bbbbbbbbb|
  911. // +------------------------+ +------------------------+
  912. // 63 0 63 0
  913. // Imm (Imm >> TO) & 0xffff
  914. // +----sext-----|--16-bit--+ +LZ|---------------------+
  915. // |111111111111110bbbbbbbbb| -> |00011110bbbbbbbbb1111111|
  916. // +------------------------+ +------------------------+
  917. // 63 0 63 0
  918. // LI8: sext many leading zeros RLDICL: rotate left TO, clear left LZ
  919. if ((LZ + FO + TO) > 48) {
  920. Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
  921. getI32Imm((Imm >> TO) & 0xffff));
  922. return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
  923. getI32Imm(TO), getI32Imm(LZ));
  924. }
  925. // 2-5) Pattern : {32 zeros}{****}{0}{15-bit value}
  926. // If Hi32 is zero and the Lo16(in Lo32) can be presented as a positive 16 bit
  927. // value, we can use LI for Lo16 without generating leading ones then add the
  928. // Hi16(in Lo32).
  929. if (LZ == 32 && ((Lo32 & 0x8000) == 0)) {
  930. Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
  931. getI32Imm(Lo32 & 0xffff));
  932. return CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64, SDValue(Result, 0),
  933. getI32Imm(Lo32 >> 16));
  934. }
  935. // 2-6) Patterns : {******}{49 zeros}{******}
  936. // {******}{49 ones}{******}
  937. // If the Imm contains 49 consecutive zeros/ones, it means that a total of 15
  938. // bits remain on both sides. Rotate right the Imm to construct an int<16>
  939. // value, use LI for int<16> value and then use RLDICL without mask to rotate
  940. // it back.
  941. //
  942. // 1) findContiguousZerosAtLeast(Imm, 49)
  943. // +------|--zeros-|------+ +---ones--||---15 bit--+
  944. // |bbbbbb0000000000aaaaaa| -> |0000000000aaaaaabbbbbb|
  945. // +----------------------+ +----------------------+
  946. // 63 0 63 0
  947. //
  948. // 2) findContiguousZerosAtLeast(~Imm, 49)
  949. // +------|--ones--|------+ +---ones--||---15 bit--+
  950. // |bbbbbb1111111111aaaaaa| -> |1111111111aaaaaabbbbbb|
  951. // +----------------------+ +----------------------+
  952. // 63 0 63 0
  953. if ((Shift = findContiguousZerosAtLeast(Imm, 49)) ||
  954. (Shift = findContiguousZerosAtLeast(~Imm, 49))) {
  955. uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue();
  956. Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
  957. getI32Imm(RotImm & 0xffff));
  958. return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
  959. getI32Imm(Shift), getI32Imm(0));
  960. }
  961. // Following patterns use 3 instructions to materialize the Imm.
  962. InstCnt = 3;
  963. // 3-1) Patterns : {zeros}{ones}{31-bit value}{zeros}
  964. // {zeros}{31-bit value}{zeros}
  965. // {zeros}{ones}{31-bit value}
  966. // {ones}{31-bit value}{zeros}
  967. // We can take advantage of LIS's sign-extension semantics to generate leading
  968. // ones, add the remaining bits with ORI, and then use RLDIC to mask off the
  969. // ones in both sides after rotation.
  970. if ((LZ + FO + TZ) > 32) {
  971. uint64_t ImmHi16 = (Imm >> (TZ + 16)) & 0xffff;
  972. unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
  973. Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
  974. Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
  975. getI32Imm((Imm >> TZ) & 0xffff));
  976. return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),
  977. getI32Imm(TZ), getI32Imm(LZ));
  978. }
  979. // 3-2) Pattern : {zeros}{31-bit value}{ones}
  980. // Shift right the Imm by (32 - LZ) bits to construct a negative 32 bits
  981. // value, therefore we can take advantage of LIS's sign-extension semantics,
  982. // add the remaining bits with ORI, and then mask them off after rotation.
  983. // This is similar to Pattern 2-3, please refer to the diagram there.
  984. if ((LZ + TO) > 32) {
  985. // Since the immediates with (LZ > 32) have been handled by previous
  986. // patterns, here we have (LZ <= 32) to make sure we will not shift right
  987. // the Imm by a negative value.
  988. assert(LZ <= 32 && "Unexpected shift value.");
  989. Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
  990. getI32Imm((Imm >> (48 - LZ)) & 0xffff));
  991. Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
  992. getI32Imm((Imm >> (32 - LZ)) & 0xffff));
  993. return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
  994. getI32Imm(32 - LZ), getI32Imm(LZ));
  995. }
  996. // 3-3) Patterns : {zeros}{ones}{31-bit value}{ones}
  997. // {ones}{31-bit value}{ones}
  998. // We can take advantage of LIS's sign-extension semantics to generate leading
  999. // ones, add the remaining bits with ORI, and then use RLDICL to mask off the
  1000. // ones in left sides (if required) after rotation.
  1001. // This is similar to Pattern 2-4, please refer to the diagram there.
  1002. if ((LZ + FO + TO) > 32) {
  1003. Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
  1004. getI32Imm((Imm >> (TO + 16)) & 0xffff));
  1005. Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
  1006. getI32Imm((Imm >> TO) & 0xffff));
  1007. return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
  1008. getI32Imm(TO), getI32Imm(LZ));
  1009. }
  1010. // 3-4) Patterns : High word == Low word
  1011. if (Hi32 == Lo32) {
  1012. // Handle the first 32 bits.
  1013. uint64_t ImmHi16 = (Lo32 >> 16) & 0xffff;
  1014. unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
  1015. Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
  1016. Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
  1017. getI32Imm(Lo32 & 0xffff));
  1018. // Use rldimi to insert the Low word into High word.
  1019. SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32),
  1020. getI32Imm(0)};
  1021. return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
  1022. }
  1023. // 3-5) Patterns : {******}{33 zeros}{******}
  1024. // {******}{33 ones}{******}
  1025. // If the Imm contains 33 consecutive zeros/ones, it means that a total of 31
  1026. // bits remain on both sides. Rotate right the Imm to construct an int<32>
  1027. // value, use LIS + ORI for int<32> value and then use RLDICL without mask to
  1028. // rotate it back.
  1029. // This is similar to Pattern 2-6, please refer to the diagram there.
  1030. if ((Shift = findContiguousZerosAtLeast(Imm, 33)) ||
  1031. (Shift = findContiguousZerosAtLeast(~Imm, 33))) {
  1032. uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue();
  1033. uint64_t ImmHi16 = (RotImm >> 16) & 0xffff;
  1034. unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
  1035. Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
  1036. Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
  1037. getI32Imm(RotImm & 0xffff));
  1038. return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
  1039. getI32Imm(Shift), getI32Imm(0));
  1040. }
  1041. InstCnt = 0;
  1042. return nullptr;
  1043. }
  1044. // Try to select instructions to generate a 64 bit immediate using prefix as
  1045. // well as non prefix instructions. The function will return the SDNode
  1046. // to materialize that constant or it will return nullptr if it does not
  1047. // find one. The variable InstCnt is set to the number of instructions that
  1048. // were selected.
  1049. static SDNode *selectI64ImmDirectPrefix(SelectionDAG *CurDAG, const SDLoc &dl,
  1050. uint64_t Imm, unsigned &InstCnt) {
  1051. unsigned TZ = countTrailingZeros<uint64_t>(Imm);
  1052. unsigned LZ = countLeadingZeros<uint64_t>(Imm);
  1053. unsigned TO = countTrailingOnes<uint64_t>(Imm);
  1054. unsigned FO = countLeadingOnes<uint64_t>(LZ == 64 ? 0 : (Imm << LZ));
  1055. unsigned Hi32 = Hi_32(Imm);
  1056. unsigned Lo32 = Lo_32(Imm);
  1057. auto getI32Imm = [CurDAG, dl](unsigned Imm) {
  1058. return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
  1059. };
  1060. auto getI64Imm = [CurDAG, dl](uint64_t Imm) {
  1061. return CurDAG->getTargetConstant(Imm, dl, MVT::i64);
  1062. };
  1063. // Following patterns use 1 instruction to materialize Imm.
  1064. InstCnt = 1;
  1065. // The pli instruction can materialize up to 34 bits directly.
  1066. // If a constant fits within 34-bits, emit the pli instruction here directly.
  1067. if (isInt<34>(Imm))
  1068. return CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
  1069. CurDAG->getTargetConstant(Imm, dl, MVT::i64));
  1070. // Require at least two instructions.
  1071. InstCnt = 2;
  1072. SDNode *Result = nullptr;
  1073. // Patterns : {zeros}{ones}{33-bit value}{zeros}
  1074. // {zeros}{33-bit value}{zeros}
  1075. // {zeros}{ones}{33-bit value}
  1076. // {ones}{33-bit value}{zeros}
  1077. // We can take advantage of PLI's sign-extension semantics to generate leading
  1078. // ones, and then use RLDIC to mask off the ones on both sides after rotation.
  1079. if ((LZ + FO + TZ) > 30) {
  1080. APInt SignedInt34 = APInt(34, (Imm >> TZ) & 0x3ffffffff);
  1081. APInt Extended = SignedInt34.sext(64);
  1082. Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
  1083. getI64Imm(*Extended.getRawData()));
  1084. return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),
  1085. getI32Imm(TZ), getI32Imm(LZ));
  1086. }
  1087. // Pattern : {zeros}{33-bit value}{ones}
  1088. // Shift right the Imm by (30 - LZ) bits to construct a negative 34 bit value,
  1089. // therefore we can take advantage of PLI's sign-extension semantics, and then
  1090. // mask them off after rotation.
  1091. //
  1092. // +--LZ--||-33-bit-||--TO--+ +-------------|--34-bit--+
  1093. // |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1|
  1094. // +------------------------+ +------------------------+
  1095. // 63 0 63 0
  1096. //
  1097. // +----sext-----|--34-bit--+ +clear-|-----------------+
  1098. // |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111|
  1099. // +------------------------+ +------------------------+
  1100. // 63 0 63 0
  1101. if ((LZ + TO) > 30) {
  1102. APInt SignedInt34 = APInt(34, (Imm >> (30 - LZ)) & 0x3ffffffff);
  1103. APInt Extended = SignedInt34.sext(64);
  1104. Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
  1105. getI64Imm(*Extended.getRawData()));
  1106. return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
  1107. getI32Imm(30 - LZ), getI32Imm(LZ));
  1108. }
  1109. // Patterns : {zeros}{ones}{33-bit value}{ones}
  1110. // {ones}{33-bit value}{ones}
  1111. // Similar to LI we can take advantage of PLI's sign-extension semantics to
  1112. // generate leading ones, and then use RLDICL to mask off the ones in left
  1113. // sides (if required) after rotation.
  1114. if ((LZ + FO + TO) > 30) {
  1115. APInt SignedInt34 = APInt(34, (Imm >> TO) & 0x3ffffffff);
  1116. APInt Extended = SignedInt34.sext(64);
  1117. Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
  1118. getI64Imm(*Extended.getRawData()));
  1119. return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
  1120. getI32Imm(TO), getI32Imm(LZ));
  1121. }
  1122. // Patterns : {******}{31 zeros}{******}
  1123. // : {******}{31 ones}{******}
  1124. // If Imm contains 31 consecutive zeros/ones then the remaining bit count
  1125. // is 33. Rotate right the Imm to construct a int<33> value, we can use PLI
  1126. // for the int<33> value and then use RLDICL without a mask to rotate it back.
  1127. //
  1128. // +------|--ones--|------+ +---ones--||---33 bit--+
  1129. // |bbbbbb1111111111aaaaaa| -> |1111111111aaaaaabbbbbb|
  1130. // +----------------------+ +----------------------+
  1131. // 63 0 63 0
  1132. for (unsigned Shift = 0; Shift < 63; ++Shift) {
  1133. uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue();
  1134. if (isInt<34>(RotImm)) {
  1135. Result =
  1136. CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(RotImm));
  1137. return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
  1138. SDValue(Result, 0), getI32Imm(Shift),
  1139. getI32Imm(0));
  1140. }
  1141. }
  1142. // Patterns : High word == Low word
  1143. // This is basically a splat of a 32 bit immediate.
  1144. if (Hi32 == Lo32) {
  1145. Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Hi32));
  1146. SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32),
  1147. getI32Imm(0)};
  1148. return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
  1149. }
  1150. InstCnt = 3;
  1151. // Catch-all
  1152. // This pattern can form any 64 bit immediate in 3 instructions.
  1153. SDNode *ResultHi =
  1154. CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Hi32));
  1155. SDNode *ResultLo =
  1156. CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Lo32));
  1157. SDValue Ops[] = {SDValue(ResultLo, 0), SDValue(ResultHi, 0), getI32Imm(32),
  1158. getI32Imm(0)};
  1159. return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
  1160. }
  1161. static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm,
  1162. unsigned *InstCnt = nullptr) {
  1163. unsigned InstCntDirect = 0;
  1164. // No more than 3 instructions are used if we can select the i64 immediate
  1165. // directly.
  1166. SDNode *Result = selectI64ImmDirect(CurDAG, dl, Imm, InstCntDirect);
  1167. const PPCSubtarget &Subtarget =
  1168. CurDAG->getMachineFunction().getSubtarget<PPCSubtarget>();
  1169. // If we have prefixed instructions and there is a chance we can
  1170. // materialize the constant with fewer prefixed instructions than
  1171. // non-prefixed, try that.
  1172. if (Subtarget.hasPrefixInstrs() && InstCntDirect != 1) {
  1173. unsigned InstCntDirectP = 0;
  1174. SDNode *ResultP = selectI64ImmDirectPrefix(CurDAG, dl, Imm, InstCntDirectP);
  1175. // Use the prefix case in either of two cases:
  1176. // 1) We have no result from the non-prefix case to use.
  1177. // 2) The non-prefix case uses more instructions than the prefix case.
  1178. // If the prefix and non-prefix cases use the same number of instructions
  1179. // we will prefer the non-prefix case.
  1180. if (ResultP && (!Result || InstCntDirectP < InstCntDirect)) {
  1181. if (InstCnt)
  1182. *InstCnt = InstCntDirectP;
  1183. return ResultP;
  1184. }
  1185. }
  1186. if (Result) {
  1187. if (InstCnt)
  1188. *InstCnt = InstCntDirect;
  1189. return Result;
  1190. }
  1191. auto getI32Imm = [CurDAG, dl](unsigned Imm) {
  1192. return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
  1193. };
  1194. // Handle the upper 32 bit value.
  1195. Result =
  1196. selectI64ImmDirect(CurDAG, dl, Imm & 0xffffffff00000000, InstCntDirect);
  1197. // Add in the last bits as required.
  1198. if (uint32_t Hi16 = (Lo_32(Imm) >> 16) & 0xffff) {
  1199. Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64,
  1200. SDValue(Result, 0), getI32Imm(Hi16));
  1201. ++InstCntDirect;
  1202. }
  1203. if (uint32_t Lo16 = Lo_32(Imm) & 0xffff) {
  1204. Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
  1205. getI32Imm(Lo16));
  1206. ++InstCntDirect;
  1207. }
  1208. if (InstCnt)
  1209. *InstCnt = InstCntDirect;
  1210. return Result;
  1211. }
  1212. // Select a 64-bit constant.
  1213. static SDNode *selectI64Imm(SelectionDAG *CurDAG, SDNode *N) {
  1214. SDLoc dl(N);
  1215. // Get 64 bit value.
  1216. int64_t Imm = cast<ConstantSDNode>(N)->getZExtValue();
  1217. if (unsigned MinSize = allUsesTruncate(CurDAG, N)) {
  1218. uint64_t SextImm = SignExtend64(Imm, MinSize);
  1219. SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);
  1220. if (isInt<16>(SextImm))
  1221. return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);
  1222. }
  1223. return selectI64Imm(CurDAG, dl, Imm);
  1224. }
  1225. namespace {
  1226. class BitPermutationSelector {
  1227. struct ValueBit {
  1228. SDValue V;
  1229. // The bit number in the value, using a convention where bit 0 is the
  1230. // lowest-order bit.
  1231. unsigned Idx;
  1232. // ConstZero means a bit we need to mask off.
  1233. // Variable is a bit comes from an input variable.
  1234. // VariableKnownToBeZero is also a bit comes from an input variable,
  1235. // but it is known to be already zero. So we do not need to mask them.
  1236. enum Kind {
  1237. ConstZero,
  1238. Variable,
  1239. VariableKnownToBeZero
  1240. } K;
  1241. ValueBit(SDValue V, unsigned I, Kind K = Variable)
  1242. : V(V), Idx(I), K(K) {}
  1243. ValueBit(Kind K = Variable) : Idx(UINT32_MAX), K(K) {}
  1244. bool isZero() const {
  1245. return K == ConstZero || K == VariableKnownToBeZero;
  1246. }
  1247. bool hasValue() const {
  1248. return K == Variable || K == VariableKnownToBeZero;
  1249. }
  1250. SDValue getValue() const {
  1251. assert(hasValue() && "Cannot get the value of a constant bit");
  1252. return V;
  1253. }
  1254. unsigned getValueBitIndex() const {
  1255. assert(hasValue() && "Cannot get the value bit index of a constant bit");
  1256. return Idx;
  1257. }
  1258. };
  1259. // A bit group has the same underlying value and the same rotate factor.
  1260. struct BitGroup {
  1261. SDValue V;
  1262. unsigned RLAmt;
  1263. unsigned StartIdx, EndIdx;
  1264. // This rotation amount assumes that the lower 32 bits of the quantity are
  1265. // replicated in the high 32 bits by the rotation operator (which is done
  1266. // by rlwinm and friends in 64-bit mode).
  1267. bool Repl32;
  1268. // Did converting to Repl32 == true change the rotation factor? If it did,
  1269. // it decreased it by 32.
  1270. bool Repl32CR;
  1271. // Was this group coalesced after setting Repl32 to true?
  1272. bool Repl32Coalesced;
  1273. BitGroup(SDValue V, unsigned R, unsigned S, unsigned E)
  1274. : V(V), RLAmt(R), StartIdx(S), EndIdx(E), Repl32(false), Repl32CR(false),
  1275. Repl32Coalesced(false) {
  1276. LLVM_DEBUG(dbgs() << "\tbit group for " << V.getNode() << " RLAmt = " << R
  1277. << " [" << S << ", " << E << "]\n");
  1278. }
  1279. };
  1280. // Information on each (Value, RLAmt) pair (like the number of groups
  1281. // associated with each) used to choose the lowering method.
  1282. struct ValueRotInfo {
  1283. SDValue V;
  1284. unsigned RLAmt = std::numeric_limits<unsigned>::max();
  1285. unsigned NumGroups = 0;
  1286. unsigned FirstGroupStartIdx = std::numeric_limits<unsigned>::max();
  1287. bool Repl32 = false;
  1288. ValueRotInfo() = default;
  1289. // For sorting (in reverse order) by NumGroups, and then by
  1290. // FirstGroupStartIdx.
  1291. bool operator < (const ValueRotInfo &Other) const {
  1292. // We need to sort so that the non-Repl32 come first because, when we're
  1293. // doing masking, the Repl32 bit groups might be subsumed into the 64-bit
  1294. // masking operation.
  1295. if (Repl32 < Other.Repl32)
  1296. return true;
  1297. else if (Repl32 > Other.Repl32)
  1298. return false;
  1299. else if (NumGroups > Other.NumGroups)
  1300. return true;
  1301. else if (NumGroups < Other.NumGroups)
  1302. return false;
  1303. else if (RLAmt == 0 && Other.RLAmt != 0)
  1304. return true;
  1305. else if (RLAmt != 0 && Other.RLAmt == 0)
  1306. return false;
  1307. else if (FirstGroupStartIdx < Other.FirstGroupStartIdx)
  1308. return true;
  1309. return false;
  1310. }
  1311. };
  1312. using ValueBitsMemoizedValue = std::pair<bool, SmallVector<ValueBit, 64>>;
  1313. using ValueBitsMemoizer =
  1314. DenseMap<SDValue, std::unique_ptr<ValueBitsMemoizedValue>>;
  1315. ValueBitsMemoizer Memoizer;
  1316. // Return a pair of bool and a SmallVector pointer to a memoization entry.
  1317. // The bool is true if something interesting was deduced, otherwise if we're
  1318. // providing only a generic representation of V (or something else likewise
  1319. // uninteresting for instruction selection) through the SmallVector.
  1320. std::pair<bool, SmallVector<ValueBit, 64> *> getValueBits(SDValue V,
  1321. unsigned NumBits) {
  1322. auto &ValueEntry = Memoizer[V];
  1323. if (ValueEntry)
  1324. return std::make_pair(ValueEntry->first, &ValueEntry->second);
  1325. ValueEntry.reset(new ValueBitsMemoizedValue());
  1326. bool &Interesting = ValueEntry->first;
  1327. SmallVector<ValueBit, 64> &Bits = ValueEntry->second;
  1328. Bits.resize(NumBits);
  1329. switch (V.getOpcode()) {
  1330. default: break;
  1331. case ISD::ROTL:
  1332. if (isa<ConstantSDNode>(V.getOperand(1))) {
  1333. unsigned RotAmt = V.getConstantOperandVal(1);
  1334. const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
  1335. for (unsigned i = 0; i < NumBits; ++i)
  1336. Bits[i] = LHSBits[i < RotAmt ? i + (NumBits - RotAmt) : i - RotAmt];
  1337. return std::make_pair(Interesting = true, &Bits);
  1338. }
  1339. break;
  1340. case ISD::SHL:
  1341. case PPCISD::SHL:
  1342. if (isa<ConstantSDNode>(V.getOperand(1))) {
  1343. unsigned ShiftAmt = V.getConstantOperandVal(1);
  1344. const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
  1345. for (unsigned i = ShiftAmt; i < NumBits; ++i)
  1346. Bits[i] = LHSBits[i - ShiftAmt];
  1347. for (unsigned i = 0; i < ShiftAmt; ++i)
  1348. Bits[i] = ValueBit(ValueBit::ConstZero);
  1349. return std::make_pair(Interesting = true, &Bits);
  1350. }
  1351. break;
  1352. case ISD::SRL:
  1353. case PPCISD::SRL:
  1354. if (isa<ConstantSDNode>(V.getOperand(1))) {
  1355. unsigned ShiftAmt = V.getConstantOperandVal(1);
  1356. const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
  1357. for (unsigned i = 0; i < NumBits - ShiftAmt; ++i)
  1358. Bits[i] = LHSBits[i + ShiftAmt];
  1359. for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i)
  1360. Bits[i] = ValueBit(ValueBit::ConstZero);
  1361. return std::make_pair(Interesting = true, &Bits);
  1362. }
  1363. break;
  1364. case ISD::AND:
  1365. if (isa<ConstantSDNode>(V.getOperand(1))) {
  1366. uint64_t Mask = V.getConstantOperandVal(1);
  1367. const SmallVector<ValueBit, 64> *LHSBits;
  1368. // Mark this as interesting, only if the LHS was also interesting. This
  1369. // prevents the overall procedure from matching a single immediate 'and'
  1370. // (which is non-optimal because such an and might be folded with other
  1371. // things if we don't select it here).
  1372. std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), NumBits);
  1373. for (unsigned i = 0; i < NumBits; ++i)
  1374. if (((Mask >> i) & 1) == 1)
  1375. Bits[i] = (*LHSBits)[i];
  1376. else {
  1377. // AND instruction masks this bit. If the input is already zero,
  1378. // we have nothing to do here. Otherwise, make the bit ConstZero.
  1379. if ((*LHSBits)[i].isZero())
  1380. Bits[i] = (*LHSBits)[i];
  1381. else
  1382. Bits[i] = ValueBit(ValueBit::ConstZero);
  1383. }
  1384. return std::make_pair(Interesting, &Bits);
  1385. }
  1386. break;
  1387. case ISD::OR: {
  1388. const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
  1389. const auto &RHSBits = *getValueBits(V.getOperand(1), NumBits).second;
  1390. bool AllDisjoint = true;
  1391. SDValue LastVal = SDValue();
  1392. unsigned LastIdx = 0;
  1393. for (unsigned i = 0; i < NumBits; ++i) {
  1394. if (LHSBits[i].isZero() && RHSBits[i].isZero()) {
  1395. // If both inputs are known to be zero and one is ConstZero and
  1396. // another is VariableKnownToBeZero, we can select whichever
  1397. // we like. To minimize the number of bit groups, we select
  1398. // VariableKnownToBeZero if this bit is the next bit of the same
  1399. // input variable from the previous bit. Otherwise, we select
  1400. // ConstZero.
  1401. if (LHSBits[i].hasValue() && LHSBits[i].getValue() == LastVal &&
  1402. LHSBits[i].getValueBitIndex() == LastIdx + 1)
  1403. Bits[i] = LHSBits[i];
  1404. else if (RHSBits[i].hasValue() && RHSBits[i].getValue() == LastVal &&
  1405. RHSBits[i].getValueBitIndex() == LastIdx + 1)
  1406. Bits[i] = RHSBits[i];
  1407. else
  1408. Bits[i] = ValueBit(ValueBit::ConstZero);
  1409. }
  1410. else if (LHSBits[i].isZero())
  1411. Bits[i] = RHSBits[i];
  1412. else if (RHSBits[i].isZero())
  1413. Bits[i] = LHSBits[i];
  1414. else {
  1415. AllDisjoint = false;
  1416. break;
  1417. }
  1418. // We remember the value and bit index of this bit.
  1419. if (Bits[i].hasValue()) {
  1420. LastVal = Bits[i].getValue();
  1421. LastIdx = Bits[i].getValueBitIndex();
  1422. }
  1423. else {
  1424. if (LastVal) LastVal = SDValue();
  1425. LastIdx = 0;
  1426. }
  1427. }
  1428. if (!AllDisjoint)
  1429. break;
  1430. return std::make_pair(Interesting = true, &Bits);
  1431. }
  1432. case ISD::ZERO_EXTEND: {
  1433. // We support only the case with zero extension from i32 to i64 so far.
  1434. if (V.getValueType() != MVT::i64 ||
  1435. V.getOperand(0).getValueType() != MVT::i32)
  1436. break;
  1437. const SmallVector<ValueBit, 64> *LHSBits;
  1438. const unsigned NumOperandBits = 32;
  1439. std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
  1440. NumOperandBits);
  1441. for (unsigned i = 0; i < NumOperandBits; ++i)
  1442. Bits[i] = (*LHSBits)[i];
  1443. for (unsigned i = NumOperandBits; i < NumBits; ++i)
  1444. Bits[i] = ValueBit(ValueBit::ConstZero);
  1445. return std::make_pair(Interesting, &Bits);
  1446. }
  1447. case ISD::TRUNCATE: {
  1448. EVT FromType = V.getOperand(0).getValueType();
  1449. EVT ToType = V.getValueType();
  1450. // We support only the case with truncate from i64 to i32.
  1451. if (FromType != MVT::i64 || ToType != MVT::i32)
  1452. break;
  1453. const unsigned NumAllBits = FromType.getSizeInBits();
  1454. SmallVector<ValueBit, 64> *InBits;
  1455. std::tie(Interesting, InBits) = getValueBits(V.getOperand(0),
  1456. NumAllBits);
  1457. const unsigned NumValidBits = ToType.getSizeInBits();
  1458. // A 32-bit instruction cannot touch upper 32-bit part of 64-bit value.
  1459. // So, we cannot include this truncate.
  1460. bool UseUpper32bit = false;
  1461. for (unsigned i = 0; i < NumValidBits; ++i)
  1462. if ((*InBits)[i].hasValue() && (*InBits)[i].getValueBitIndex() >= 32) {
  1463. UseUpper32bit = true;
  1464. break;
  1465. }
  1466. if (UseUpper32bit)
  1467. break;
  1468. for (unsigned i = 0; i < NumValidBits; ++i)
  1469. Bits[i] = (*InBits)[i];
  1470. return std::make_pair(Interesting, &Bits);
  1471. }
  1472. case ISD::AssertZext: {
  1473. // For AssertZext, we look through the operand and
  1474. // mark the bits known to be zero.
  1475. const SmallVector<ValueBit, 64> *LHSBits;
  1476. std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
  1477. NumBits);
  1478. EVT FromType = cast<VTSDNode>(V.getOperand(1))->getVT();
  1479. const unsigned NumValidBits = FromType.getSizeInBits();
  1480. for (unsigned i = 0; i < NumValidBits; ++i)
  1481. Bits[i] = (*LHSBits)[i];
  1482. // These bits are known to be zero but the AssertZext may be from a value
  1483. // that already has some constant zero bits (i.e. from a masking and).
  1484. for (unsigned i = NumValidBits; i < NumBits; ++i)
  1485. Bits[i] = (*LHSBits)[i].hasValue()
  1486. ? ValueBit((*LHSBits)[i].getValue(),
  1487. (*LHSBits)[i].getValueBitIndex(),
  1488. ValueBit::VariableKnownToBeZero)
  1489. : ValueBit(ValueBit::ConstZero);
  1490. return std::make_pair(Interesting, &Bits);
  1491. }
  1492. case ISD::LOAD:
  1493. LoadSDNode *LD = cast<LoadSDNode>(V);
  1494. if (ISD::isZEXTLoad(V.getNode()) && V.getResNo() == 0) {
  1495. EVT VT = LD->getMemoryVT();
  1496. const unsigned NumValidBits = VT.getSizeInBits();
  1497. for (unsigned i = 0; i < NumValidBits; ++i)
  1498. Bits[i] = ValueBit(V, i);
  1499. // These bits are known to be zero.
  1500. for (unsigned i = NumValidBits; i < NumBits; ++i)
  1501. Bits[i] = ValueBit(V, i, ValueBit::VariableKnownToBeZero);
  1502. // Zero-extending load itself cannot be optimized. So, it is not
  1503. // interesting by itself though it gives useful information.
  1504. return std::make_pair(Interesting = false, &Bits);
  1505. }
  1506. break;
  1507. }
  1508. for (unsigned i = 0; i < NumBits; ++i)
  1509. Bits[i] = ValueBit(V, i);
  1510. return std::make_pair(Interesting = false, &Bits);
  1511. }
  1512. // For each value (except the constant ones), compute the left-rotate amount
  1513. // to get it from its original to final position.
  1514. void computeRotationAmounts() {
  1515. NeedMask = false;
  1516. RLAmt.resize(Bits.size());
  1517. for (unsigned i = 0; i < Bits.size(); ++i)
  1518. if (Bits[i].hasValue()) {
  1519. unsigned VBI = Bits[i].getValueBitIndex();
  1520. if (i >= VBI)
  1521. RLAmt[i] = i - VBI;
  1522. else
  1523. RLAmt[i] = Bits.size() - (VBI - i);
  1524. } else if (Bits[i].isZero()) {
  1525. NeedMask = true;
  1526. RLAmt[i] = UINT32_MAX;
  1527. } else {
  1528. llvm_unreachable("Unknown value bit type");
  1529. }
  1530. }
  1531. // Collect groups of consecutive bits with the same underlying value and
  1532. // rotation factor. If we're doing late masking, we ignore zeros, otherwise
  1533. // they break up groups.
  1534. void collectBitGroups(bool LateMask) {
  1535. BitGroups.clear();
  1536. unsigned LastRLAmt = RLAmt[0];
  1537. SDValue LastValue = Bits[0].hasValue() ? Bits[0].getValue() : SDValue();
  1538. unsigned LastGroupStartIdx = 0;
  1539. bool IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();
  1540. for (unsigned i = 1; i < Bits.size(); ++i) {
  1541. unsigned ThisRLAmt = RLAmt[i];
  1542. SDValue ThisValue = Bits[i].hasValue() ? Bits[i].getValue() : SDValue();
  1543. if (LateMask && !ThisValue) {
  1544. ThisValue = LastValue;
  1545. ThisRLAmt = LastRLAmt;
  1546. // If we're doing late masking, then the first bit group always starts
  1547. // at zero (even if the first bits were zero).
  1548. if (BitGroups.empty())
  1549. LastGroupStartIdx = 0;
  1550. }
  1551. // If this bit is known to be zero and the current group is a bit group
  1552. // of zeros, we do not need to terminate the current bit group even the
  1553. // Value or RLAmt does not match here. Instead, we terminate this group
  1554. // when the first non-zero bit appears later.
  1555. if (IsGroupOfZeros && Bits[i].isZero())
  1556. continue;
  1557. // If this bit has the same underlying value and the same rotate factor as
  1558. // the last one, then they're part of the same group.
  1559. if (ThisRLAmt == LastRLAmt && ThisValue == LastValue)
  1560. // We cannot continue the current group if this bits is not known to
  1561. // be zero in a bit group of zeros.
  1562. if (!(IsGroupOfZeros && ThisValue && !Bits[i].isZero()))
  1563. continue;
  1564. if (LastValue.getNode())
  1565. BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
  1566. i-1));
  1567. LastRLAmt = ThisRLAmt;
  1568. LastValue = ThisValue;
  1569. LastGroupStartIdx = i;
  1570. IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();
  1571. }
  1572. if (LastValue.getNode())
  1573. BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
  1574. Bits.size()-1));
  1575. if (BitGroups.empty())
  1576. return;
  1577. // We might be able to combine the first and last groups.
  1578. if (BitGroups.size() > 1) {
  1579. // If the first and last groups are the same, then remove the first group
  1580. // in favor of the last group, making the ending index of the last group
  1581. // equal to the ending index of the to-be-removed first group.
  1582. if (BitGroups[0].StartIdx == 0 &&
  1583. BitGroups[BitGroups.size()-1].EndIdx == Bits.size()-1 &&
  1584. BitGroups[0].V == BitGroups[BitGroups.size()-1].V &&
  1585. BitGroups[0].RLAmt == BitGroups[BitGroups.size()-1].RLAmt) {
  1586. LLVM_DEBUG(dbgs() << "\tcombining final bit group with initial one\n");
  1587. BitGroups[BitGroups.size()-1].EndIdx = BitGroups[0].EndIdx;
  1588. BitGroups.erase(BitGroups.begin());
  1589. }
  1590. }
  1591. }
  1592. // Take all (SDValue, RLAmt) pairs and sort them by the number of groups
  1593. // associated with each. If the number of groups are same, we prefer a group
  1594. // which does not require rotate, i.e. RLAmt is 0, to avoid the first rotate
  1595. // instruction. If there is a degeneracy, pick the one that occurs
  1596. // first (in the final value).
  1597. void collectValueRotInfo() {
  1598. ValueRots.clear();
  1599. for (auto &BG : BitGroups) {
  1600. unsigned RLAmtKey = BG.RLAmt + (BG.Repl32 ? 64 : 0);
  1601. ValueRotInfo &VRI = ValueRots[std::make_pair(BG.V, RLAmtKey)];
  1602. VRI.V = BG.V;
  1603. VRI.RLAmt = BG.RLAmt;
  1604. VRI.Repl32 = BG.Repl32;
  1605. VRI.NumGroups += 1;
  1606. VRI.FirstGroupStartIdx = std::min(VRI.FirstGroupStartIdx, BG.StartIdx);
  1607. }
  1608. // Now that we've collected the various ValueRotInfo instances, we need to
  1609. // sort them.
  1610. ValueRotsVec.clear();
  1611. for (auto &I : ValueRots) {
  1612. ValueRotsVec.push_back(I.second);
  1613. }
  1614. llvm::sort(ValueRotsVec);
  1615. }
  1616. // In 64-bit mode, rlwinm and friends have a rotation operator that
  1617. // replicates the low-order 32 bits into the high-order 32-bits. The mask
  1618. // indices of these instructions can only be in the lower 32 bits, so they
  1619. // can only represent some 64-bit bit groups. However, when they can be used,
  1620. // the 32-bit replication can be used to represent, as a single bit group,
  1621. // otherwise separate bit groups. We'll convert to replicated-32-bit bit
  1622. // groups when possible. Returns true if any of the bit groups were
  1623. // converted.
  1624. void assignRepl32BitGroups() {
  1625. // If we have bits like this:
  1626. //
  1627. // Indices: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
  1628. // V bits: ... 7 6 5 4 3 2 1 0 31 30 29 28 27 26 25 24
  1629. // Groups: | RLAmt = 8 | RLAmt = 40 |
  1630. //
  1631. // But, making use of a 32-bit operation that replicates the low-order 32
  1632. // bits into the high-order 32 bits, this can be one bit group with a RLAmt
  1633. // of 8.
  1634. auto IsAllLow32 = [this](BitGroup & BG) {
  1635. if (BG.StartIdx <= BG.EndIdx) {
  1636. for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) {
  1637. if (!Bits[i].hasValue())
  1638. continue;
  1639. if (Bits[i].getValueBitIndex() >= 32)
  1640. return false;
  1641. }
  1642. } else {
  1643. for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) {
  1644. if (!Bits[i].hasValue())
  1645. continue;
  1646. if (Bits[i].getValueBitIndex() >= 32)
  1647. return false;
  1648. }
  1649. for (unsigned i = 0; i <= BG.EndIdx; ++i) {
  1650. if (!Bits[i].hasValue())
  1651. continue;
  1652. if (Bits[i].getValueBitIndex() >= 32)
  1653. return false;
  1654. }
  1655. }
  1656. return true;
  1657. };
  1658. for (auto &BG : BitGroups) {
  1659. // If this bit group has RLAmt of 0 and will not be merged with
  1660. // another bit group, we don't benefit from Repl32. We don't mark
  1661. // such group to give more freedom for later instruction selection.
  1662. if (BG.RLAmt == 0) {
  1663. auto PotentiallyMerged = [this](BitGroup & BG) {
  1664. for (auto &BG2 : BitGroups)
  1665. if (&BG != &BG2 && BG.V == BG2.V &&
  1666. (BG2.RLAmt == 0 || BG2.RLAmt == 32))
  1667. return true;
  1668. return false;
  1669. };
  1670. if (!PotentiallyMerged(BG))
  1671. continue;
  1672. }
  1673. if (BG.StartIdx < 32 && BG.EndIdx < 32) {
  1674. if (IsAllLow32(BG)) {
  1675. if (BG.RLAmt >= 32) {
  1676. BG.RLAmt -= 32;
  1677. BG.Repl32CR = true;
  1678. }
  1679. BG.Repl32 = true;
  1680. LLVM_DEBUG(dbgs() << "\t32-bit replicated bit group for "
  1681. << BG.V.getNode() << " RLAmt = " << BG.RLAmt << " ["
  1682. << BG.StartIdx << ", " << BG.EndIdx << "]\n");
  1683. }
  1684. }
  1685. }
  1686. // Now walk through the bit groups, consolidating where possible.
  1687. for (auto I = BitGroups.begin(); I != BitGroups.end();) {
  1688. // We might want to remove this bit group by merging it with the previous
  1689. // group (which might be the ending group).
  1690. auto IP = (I == BitGroups.begin()) ?
  1691. std::prev(BitGroups.end()) : std::prev(I);
  1692. if (I->Repl32 && IP->Repl32 && I->V == IP->V && I->RLAmt == IP->RLAmt &&
  1693. I->StartIdx == (IP->EndIdx + 1) % 64 && I != IP) {
  1694. LLVM_DEBUG(dbgs() << "\tcombining 32-bit replicated bit group for "
  1695. << I->V.getNode() << " RLAmt = " << I->RLAmt << " ["
  1696. << I->StartIdx << ", " << I->EndIdx
  1697. << "] with group with range [" << IP->StartIdx << ", "
  1698. << IP->EndIdx << "]\n");
  1699. IP->EndIdx = I->EndIdx;
  1700. IP->Repl32CR = IP->Repl32CR || I->Repl32CR;
  1701. IP->Repl32Coalesced = true;
  1702. I = BitGroups.erase(I);
  1703. continue;
  1704. } else {
  1705. // There is a special case worth handling: If there is a single group
  1706. // covering the entire upper 32 bits, and it can be merged with both
  1707. // the next and previous groups (which might be the same group), then
  1708. // do so. If it is the same group (so there will be only one group in
  1709. // total), then we need to reverse the order of the range so that it
  1710. // covers the entire 64 bits.
  1711. if (I->StartIdx == 32 && I->EndIdx == 63) {
  1712. assert(std::next(I) == BitGroups.end() &&
  1713. "bit group ends at index 63 but there is another?");
  1714. auto IN = BitGroups.begin();
  1715. if (IP->Repl32 && IN->Repl32 && I->V == IP->V && I->V == IN->V &&
  1716. (I->RLAmt % 32) == IP->RLAmt && (I->RLAmt % 32) == IN->RLAmt &&
  1717. IP->EndIdx == 31 && IN->StartIdx == 0 && I != IP &&
  1718. IsAllLow32(*I)) {
  1719. LLVM_DEBUG(dbgs() << "\tcombining bit group for " << I->V.getNode()
  1720. << " RLAmt = " << I->RLAmt << " [" << I->StartIdx
  1721. << ", " << I->EndIdx
  1722. << "] with 32-bit replicated groups with ranges ["
  1723. << IP->StartIdx << ", " << IP->EndIdx << "] and ["
  1724. << IN->StartIdx << ", " << IN->EndIdx << "]\n");
  1725. if (IP == IN) {
  1726. // There is only one other group; change it to cover the whole
  1727. // range (backward, so that it can still be Repl32 but cover the
  1728. // whole 64-bit range).
  1729. IP->StartIdx = 31;
  1730. IP->EndIdx = 30;
  1731. IP->Repl32CR = IP->Repl32CR || I->RLAmt >= 32;
  1732. IP->Repl32Coalesced = true;
  1733. I = BitGroups.erase(I);
  1734. } else {
  1735. // There are two separate groups, one before this group and one
  1736. // after us (at the beginning). We're going to remove this group,
  1737. // but also the group at the very beginning.
  1738. IP->EndIdx = IN->EndIdx;
  1739. IP->Repl32CR = IP->Repl32CR || IN->Repl32CR || I->RLAmt >= 32;
  1740. IP->Repl32Coalesced = true;
  1741. I = BitGroups.erase(I);
  1742. BitGroups.erase(BitGroups.begin());
  1743. }
  1744. // This must be the last group in the vector (and we might have
  1745. // just invalidated the iterator above), so break here.
  1746. break;
  1747. }
  1748. }
  1749. }
  1750. ++I;
  1751. }
  1752. }
  1753. SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
  1754. return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
  1755. }
  1756. uint64_t getZerosMask() {
  1757. uint64_t Mask = 0;
  1758. for (unsigned i = 0; i < Bits.size(); ++i) {
  1759. if (Bits[i].hasValue())
  1760. continue;
  1761. Mask |= (UINT64_C(1) << i);
  1762. }
  1763. return ~Mask;
  1764. }
  1765. // This method extends an input value to 64 bit if input is 32-bit integer.
  1766. // While selecting instructions in BitPermutationSelector in 64-bit mode,
  1767. // an input value can be a 32-bit integer if a ZERO_EXTEND node is included.
  1768. // In such case, we extend it to 64 bit to be consistent with other values.
  1769. SDValue ExtendToInt64(SDValue V, const SDLoc &dl) {
  1770. if (V.getValueSizeInBits() == 64)
  1771. return V;
  1772. assert(V.getValueSizeInBits() == 32);
  1773. SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
  1774. SDValue ImDef = SDValue(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl,
  1775. MVT::i64), 0);
  1776. SDValue ExtVal = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl,
  1777. MVT::i64, ImDef, V,
  1778. SubRegIdx), 0);
  1779. return ExtVal;
  1780. }
  1781. SDValue TruncateToInt32(SDValue V, const SDLoc &dl) {
  1782. if (V.getValueSizeInBits() == 32)
  1783. return V;
  1784. assert(V.getValueSizeInBits() == 64);
  1785. SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
  1786. SDValue SubVal = SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl,
  1787. MVT::i32, V, SubRegIdx), 0);
  1788. return SubVal;
  1789. }
  1790. // Depending on the number of groups for a particular value, it might be
  1791. // better to rotate, mask explicitly (using andi/andis), and then or the
  1792. // result. Select this part of the result first.
  1793. void SelectAndParts32(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {
  1794. if (BPermRewriterNoMasking)
  1795. return;
  1796. for (ValueRotInfo &VRI : ValueRotsVec) {
  1797. unsigned Mask = 0;
  1798. for (unsigned i = 0; i < Bits.size(); ++i) {
  1799. if (!Bits[i].hasValue() || Bits[i].getValue() != VRI.V)
  1800. continue;
  1801. if (RLAmt[i] != VRI.RLAmt)
  1802. continue;
  1803. Mask |= (1u << i);
  1804. }
  1805. // Compute the masks for andi/andis that would be necessary.
  1806. unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
  1807. assert((ANDIMask != 0 || ANDISMask != 0) &&
  1808. "No set bits in mask for value bit groups");
  1809. bool NeedsRotate = VRI.RLAmt != 0;
  1810. // We're trying to minimize the number of instructions. If we have one
  1811. // group, using one of andi/andis can break even. If we have three
  1812. // groups, we can use both andi and andis and break even (to use both
  1813. // andi and andis we also need to or the results together). We need four
  1814. // groups if we also need to rotate. To use andi/andis we need to do more
  1815. // than break even because rotate-and-mask instructions tend to be easier
  1816. // to schedule.
  1817. // FIXME: We've biased here against using andi/andis, which is right for
  1818. // POWER cores, but not optimal everywhere. For example, on the A2,
  1819. // andi/andis have single-cycle latency whereas the rotate-and-mask
  1820. // instructions take two cycles, and it would be better to bias toward
  1821. // andi/andis in break-even cases.
  1822. unsigned NumAndInsts = (unsigned) NeedsRotate +
  1823. (unsigned) (ANDIMask != 0) +
  1824. (unsigned) (ANDISMask != 0) +
  1825. (unsigned) (ANDIMask != 0 && ANDISMask != 0) +
  1826. (unsigned) (bool) Res;
  1827. LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode()
  1828. << " RL: " << VRI.RLAmt << ":"
  1829. << "\n\t\t\tisel using masking: " << NumAndInsts
  1830. << " using rotates: " << VRI.NumGroups << "\n");
  1831. if (NumAndInsts >= VRI.NumGroups)
  1832. continue;
  1833. LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n");
  1834. if (InstCnt) *InstCnt += NumAndInsts;
  1835. SDValue VRot;
  1836. if (VRI.RLAmt) {
  1837. SDValue Ops[] =
  1838. { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),
  1839. getI32Imm(0, dl), getI32Imm(31, dl) };
  1840. VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
  1841. Ops), 0);
  1842. } else {
  1843. VRot = TruncateToInt32(VRI.V, dl);
  1844. }
  1845. SDValue ANDIVal, ANDISVal;
  1846. if (ANDIMask != 0)
  1847. ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32,
  1848. VRot, getI32Imm(ANDIMask, dl)),
  1849. 0);
  1850. if (ANDISMask != 0)
  1851. ANDISVal =
  1852. SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, VRot,
  1853. getI32Imm(ANDISMask, dl)),
  1854. 0);
  1855. SDValue TotalVal;
  1856. if (!ANDIVal)
  1857. TotalVal = ANDISVal;
  1858. else if (!ANDISVal)
  1859. TotalVal = ANDIVal;
  1860. else
  1861. TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
  1862. ANDIVal, ANDISVal), 0);
  1863. if (!Res)
  1864. Res = TotalVal;
  1865. else
  1866. Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
  1867. Res, TotalVal), 0);
  1868. // Now, remove all groups with this underlying value and rotation
  1869. // factor.
  1870. eraseMatchingBitGroups([VRI](const BitGroup &BG) {
  1871. return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
  1872. });
  1873. }
  1874. }
  1875. // Instruction selection for the 32-bit case.
  1876. SDNode *Select32(SDNode *N, bool LateMask, unsigned *InstCnt) {
  1877. SDLoc dl(N);
  1878. SDValue Res;
  1879. if (InstCnt) *InstCnt = 0;
  1880. // Take care of cases that should use andi/andis first.
  1881. SelectAndParts32(dl, Res, InstCnt);
  1882. // If we've not yet selected a 'starting' instruction, and we have no zeros
  1883. // to fill in, select the (Value, RLAmt) with the highest priority (largest
  1884. // number of groups), and start with this rotated value.
  1885. if ((!NeedMask || LateMask) && !Res) {
  1886. ValueRotInfo &VRI = ValueRotsVec[0];
  1887. if (VRI.RLAmt) {
  1888. if (InstCnt) *InstCnt += 1;
  1889. SDValue Ops[] =
  1890. { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),
  1891. getI32Imm(0, dl), getI32Imm(31, dl) };
  1892. Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops),
  1893. 0);
  1894. } else {
  1895. Res = TruncateToInt32(VRI.V, dl);
  1896. }
  1897. // Now, remove all groups with this underlying value and rotation factor.
  1898. eraseMatchingBitGroups([VRI](const BitGroup &BG) {
  1899. return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
  1900. });
  1901. }
  1902. if (InstCnt) *InstCnt += BitGroups.size();
  1903. // Insert the other groups (one at a time).
  1904. for (auto &BG : BitGroups) {
  1905. if (!Res) {
  1906. SDValue Ops[] =
  1907. { TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),
  1908. getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
  1909. getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
  1910. Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
  1911. } else {
  1912. SDValue Ops[] =
  1913. { Res, TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),
  1914. getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
  1915. getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
  1916. Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0);
  1917. }
  1918. }
  1919. if (LateMask) {
  1920. unsigned Mask = (unsigned) getZerosMask();
  1921. unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
  1922. assert((ANDIMask != 0 || ANDISMask != 0) &&
  1923. "No set bits in zeros mask?");
  1924. if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
  1925. (unsigned) (ANDISMask != 0) +
  1926. (unsigned) (ANDIMask != 0 && ANDISMask != 0);
  1927. SDValue ANDIVal, ANDISVal;
  1928. if (ANDIMask != 0)
  1929. ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32,
  1930. Res, getI32Imm(ANDIMask, dl)),
  1931. 0);
  1932. if (ANDISMask != 0)
  1933. ANDISVal =
  1934. SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, Res,
  1935. getI32Imm(ANDISMask, dl)),
  1936. 0);
  1937. if (!ANDIVal)
  1938. Res = ANDISVal;
  1939. else if (!ANDISVal)
  1940. Res = ANDIVal;
  1941. else
  1942. Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
  1943. ANDIVal, ANDISVal), 0);
  1944. }
  1945. return Res.getNode();
  1946. }
  1947. unsigned SelectRotMask64Count(unsigned RLAmt, bool Repl32,
  1948. unsigned MaskStart, unsigned MaskEnd,
  1949. bool IsIns) {
  1950. // In the notation used by the instructions, 'start' and 'end' are reversed
  1951. // because bits are counted from high to low order.
  1952. unsigned InstMaskStart = 64 - MaskEnd - 1,
  1953. InstMaskEnd = 64 - MaskStart - 1;
  1954. if (Repl32)
  1955. return 1;
  1956. if ((!IsIns && (InstMaskEnd == 63 || InstMaskStart == 0)) ||
  1957. InstMaskEnd == 63 - RLAmt)
  1958. return 1;
  1959. return 2;
  1960. }
  1961. // For 64-bit values, not all combinations of rotates and masks are
  1962. // available. Produce one if it is available.
  1963. SDValue SelectRotMask64(SDValue V, const SDLoc &dl, unsigned RLAmt,
  1964. bool Repl32, unsigned MaskStart, unsigned MaskEnd,
  1965. unsigned *InstCnt = nullptr) {
  1966. // In the notation used by the instructions, 'start' and 'end' are reversed
  1967. // because bits are counted from high to low order.
  1968. unsigned InstMaskStart = 64 - MaskEnd - 1,
  1969. InstMaskEnd = 64 - MaskStart - 1;
  1970. if (InstCnt) *InstCnt += 1;
  1971. if (Repl32) {
  1972. // This rotation amount assumes that the lower 32 bits of the quantity
  1973. // are replicated in the high 32 bits by the rotation operator (which is
  1974. // done by rlwinm and friends).
  1975. assert(InstMaskStart >= 32 && "Mask cannot start out of range");
  1976. assert(InstMaskEnd >= 32 && "Mask cannot end out of range");
  1977. SDValue Ops[] =
  1978. { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
  1979. getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) };
  1980. return SDValue(CurDAG->getMachineNode(PPC::RLWINM8, dl, MVT::i64,
  1981. Ops), 0);
  1982. }
  1983. if (InstMaskEnd == 63) {
  1984. SDValue Ops[] =
  1985. { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
  1986. getI32Imm(InstMaskStart, dl) };
  1987. return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Ops), 0);
  1988. }
  1989. if (InstMaskStart == 0) {
  1990. SDValue Ops[] =
  1991. { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
  1992. getI32Imm(InstMaskEnd, dl) };
  1993. return SDValue(CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Ops), 0);
  1994. }
  1995. if (InstMaskEnd == 63 - RLAmt) {
  1996. SDValue Ops[] =
  1997. { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
  1998. getI32Imm(InstMaskStart, dl) };
  1999. return SDValue(CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, Ops), 0);
  2000. }
  2001. // We cannot do this with a single instruction, so we'll use two. The
  2002. // problem is that we're not free to choose both a rotation amount and mask
  2003. // start and end independently. We can choose an arbitrary mask start and
  2004. // end, but then the rotation amount is fixed. Rotation, however, can be
  2005. // inverted, and so by applying an "inverse" rotation first, we can get the
  2006. // desired result.
  2007. if (InstCnt) *InstCnt += 1;
  2008. // The rotation mask for the second instruction must be MaskStart.
  2009. unsigned RLAmt2 = MaskStart;
  2010. // The first instruction must rotate V so that the overall rotation amount
  2011. // is RLAmt.
  2012. unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
  2013. if (RLAmt1)
  2014. V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
  2015. return SelectRotMask64(V, dl, RLAmt2, false, MaskStart, MaskEnd);
  2016. }
  2017. // For 64-bit values, not all combinations of rotates and masks are
  2018. // available. Produce a rotate-mask-and-insert if one is available.
  2019. SDValue SelectRotMaskIns64(SDValue Base, SDValue V, const SDLoc &dl,
  2020. unsigned RLAmt, bool Repl32, unsigned MaskStart,
  2021. unsigned MaskEnd, unsigned *InstCnt = nullptr) {
  2022. // In the notation used by the instructions, 'start' and 'end' are reversed
  2023. // because bits are counted from high to low order.
  2024. unsigned InstMaskStart = 64 - MaskEnd - 1,
  2025. InstMaskEnd = 64 - MaskStart - 1;
  2026. if (InstCnt) *InstCnt += 1;
  2027. if (Repl32) {
  2028. // This rotation amount assumes that the lower 32 bits of the quantity
  2029. // are replicated in the high 32 bits by the rotation operator (which is
  2030. // done by rlwinm and friends).
  2031. assert(InstMaskStart >= 32 && "Mask cannot start out of range");
  2032. assert(InstMaskEnd >= 32 && "Mask cannot end out of range");
  2033. SDValue Ops[] =
  2034. { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
  2035. getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) };
  2036. return SDValue(CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64,
  2037. Ops), 0);
  2038. }
  2039. if (InstMaskEnd == 63 - RLAmt) {
  2040. SDValue Ops[] =
  2041. { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
  2042. getI32Imm(InstMaskStart, dl) };
  2043. return SDValue(CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops), 0);
  2044. }
  2045. // We cannot do this with a single instruction, so we'll use two. The
  2046. // problem is that we're not free to choose both a rotation amount and mask
  2047. // start and end independently. We can choose an arbitrary mask start and
  2048. // end, but then the rotation amount is fixed. Rotation, however, can be
  2049. // inverted, and so by applying an "inverse" rotation first, we can get the
  2050. // desired result.
  2051. if (InstCnt) *InstCnt += 1;
  2052. // The rotation mask for the second instruction must be MaskStart.
  2053. unsigned RLAmt2 = MaskStart;
  2054. // The first instruction must rotate V so that the overall rotation amount
  2055. // is RLAmt.
  2056. unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
  2057. if (RLAmt1)
  2058. V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
  2059. return SelectRotMaskIns64(Base, V, dl, RLAmt2, false, MaskStart, MaskEnd);
  2060. }
  2061. void SelectAndParts64(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {
  2062. if (BPermRewriterNoMasking)
  2063. return;
  2064. // The idea here is the same as in the 32-bit version, but with additional
  2065. // complications from the fact that Repl32 might be true. Because we
  2066. // aggressively convert bit groups to Repl32 form (which, for small
  2067. // rotation factors, involves no other change), and then coalesce, it might
  2068. // be the case that a single 64-bit masking operation could handle both
  2069. // some Repl32 groups and some non-Repl32 groups. If converting to Repl32
  2070. // form allowed coalescing, then we must use a 32-bit rotaton in order to
  2071. // completely capture the new combined bit group.
  2072. for (ValueRotInfo &VRI : ValueRotsVec) {
  2073. uint64_t Mask = 0;
  2074. // We need to add to the mask all bits from the associated bit groups.
  2075. // If Repl32 is false, we need to add bits from bit groups that have
  2076. // Repl32 true, but are trivially convertable to Repl32 false. Such a
  2077. // group is trivially convertable if it overlaps only with the lower 32
  2078. // bits, and the group has not been coalesced.
  2079. auto MatchingBG = [VRI](const BitGroup &BG) {
  2080. if (VRI.V != BG.V)
  2081. return false;
  2082. unsigned EffRLAmt = BG.RLAmt;
  2083. if (!VRI.Repl32 && BG.Repl32) {
  2084. if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx <= BG.EndIdx &&
  2085. !BG.Repl32Coalesced) {
  2086. if (BG.Repl32CR)
  2087. EffRLAmt += 32;
  2088. } else {
  2089. return false;
  2090. }
  2091. } else if (VRI.Repl32 != BG.Repl32) {
  2092. return false;
  2093. }
  2094. return VRI.RLAmt == EffRLAmt;
  2095. };
  2096. for (auto &BG : BitGroups) {
  2097. if (!MatchingBG(BG))
  2098. continue;
  2099. if (BG.StartIdx <= BG.EndIdx) {
  2100. for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i)
  2101. Mask |= (UINT64_C(1) << i);
  2102. } else {
  2103. for (unsigned i = BG.StartIdx; i < Bits.size(); ++i)
  2104. Mask |= (UINT64_C(1) << i);
  2105. for (unsigned i = 0; i <= BG.EndIdx; ++i)
  2106. Mask |= (UINT64_C(1) << i);
  2107. }
  2108. }
  2109. // We can use the 32-bit andi/andis technique if the mask does not
  2110. // require any higher-order bits. This can save an instruction compared
  2111. // to always using the general 64-bit technique.
  2112. bool Use32BitInsts = isUInt<32>(Mask);
  2113. // Compute the masks for andi/andis that would be necessary.
  2114. unsigned ANDIMask = (Mask & UINT16_MAX),
  2115. ANDISMask = (Mask >> 16) & UINT16_MAX;
  2116. bool NeedsRotate = VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask));
  2117. unsigned NumAndInsts = (unsigned) NeedsRotate +
  2118. (unsigned) (bool) Res;
  2119. unsigned NumOfSelectInsts = 0;
  2120. selectI64Imm(CurDAG, dl, Mask, &NumOfSelectInsts);
  2121. assert(NumOfSelectInsts > 0 && "Failed to select an i64 constant.");
  2122. if (Use32BitInsts)
  2123. NumAndInsts += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) +
  2124. (unsigned) (ANDIMask != 0 && ANDISMask != 0);
  2125. else
  2126. NumAndInsts += NumOfSelectInsts + /* and */ 1;
  2127. unsigned NumRLInsts = 0;
  2128. bool FirstBG = true;
  2129. bool MoreBG = false;
  2130. for (auto &BG : BitGroups) {
  2131. if (!MatchingBG(BG)) {
  2132. MoreBG = true;
  2133. continue;
  2134. }
  2135. NumRLInsts +=
  2136. SelectRotMask64Count(BG.RLAmt, BG.Repl32, BG.StartIdx, BG.EndIdx,
  2137. !FirstBG);
  2138. FirstBG = false;
  2139. }
  2140. LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode()
  2141. << " RL: " << VRI.RLAmt << (VRI.Repl32 ? " (32):" : ":")
  2142. << "\n\t\t\tisel using masking: " << NumAndInsts
  2143. << " using rotates: " << NumRLInsts << "\n");
  2144. // When we'd use andi/andis, we bias toward using the rotates (andi only
  2145. // has a record form, and is cracked on POWER cores). However, when using
  2146. // general 64-bit constant formation, bias toward the constant form,
  2147. // because that exposes more opportunities for CSE.
  2148. if (NumAndInsts > NumRLInsts)
  2149. continue;
  2150. // When merging multiple bit groups, instruction or is used.
  2151. // But when rotate is used, rldimi can inert the rotated value into any
  2152. // register, so instruction or can be avoided.
  2153. if ((Use32BitInsts || MoreBG) && NumAndInsts == NumRLInsts)
  2154. continue;
  2155. LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n");
  2156. if (InstCnt) *InstCnt += NumAndInsts;
  2157. SDValue VRot;
  2158. // We actually need to generate a rotation if we have a non-zero rotation
  2159. // factor or, in the Repl32 case, if we care about any of the
  2160. // higher-order replicated bits. In the latter case, we generate a mask
  2161. // backward so that it actually includes the entire 64 bits.
  2162. if (VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask)))
  2163. VRot = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
  2164. VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63);
  2165. else
  2166. VRot = VRI.V;
  2167. SDValue TotalVal;
  2168. if (Use32BitInsts) {
  2169. assert((ANDIMask != 0 || ANDISMask != 0) &&
  2170. "No set bits in mask when using 32-bit ands for 64-bit value");
  2171. SDValue ANDIVal, ANDISVal;
  2172. if (ANDIMask != 0)
  2173. ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64,
  2174. ExtendToInt64(VRot, dl),
  2175. getI32Imm(ANDIMask, dl)),
  2176. 0);
  2177. if (ANDISMask != 0)
  2178. ANDISVal =
  2179. SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64,
  2180. ExtendToInt64(VRot, dl),
  2181. getI32Imm(ANDISMask, dl)),
  2182. 0);
  2183. if (!ANDIVal)
  2184. TotalVal = ANDISVal;
  2185. else if (!ANDISVal)
  2186. TotalVal = ANDIVal;
  2187. else
  2188. TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
  2189. ExtendToInt64(ANDIVal, dl), ANDISVal), 0);
  2190. } else {
  2191. TotalVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0);
  2192. TotalVal =
  2193. SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
  2194. ExtendToInt64(VRot, dl), TotalVal),
  2195. 0);
  2196. }
  2197. if (!Res)
  2198. Res = TotalVal;
  2199. else
  2200. Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
  2201. ExtendToInt64(Res, dl), TotalVal),
  2202. 0);
  2203. // Now, remove all groups with this underlying value and rotation
  2204. // factor.
  2205. eraseMatchingBitGroups(MatchingBG);
  2206. }
  2207. }
  2208. // Instruction selection for the 64-bit case.
  2209. SDNode *Select64(SDNode *N, bool LateMask, unsigned *InstCnt) {
  2210. SDLoc dl(N);
  2211. SDValue Res;
  2212. if (InstCnt) *InstCnt = 0;
  2213. // Take care of cases that should use andi/andis first.
  2214. SelectAndParts64(dl, Res, InstCnt);
  2215. // If we've not yet selected a 'starting' instruction, and we have no zeros
  2216. // to fill in, select the (Value, RLAmt) with the highest priority (largest
  2217. // number of groups), and start with this rotated value.
  2218. if ((!NeedMask || LateMask) && !Res) {
  2219. // If we have both Repl32 groups and non-Repl32 groups, the non-Repl32
  2220. // groups will come first, and so the VRI representing the largest number
  2221. // of groups might not be first (it might be the first Repl32 groups).
  2222. unsigned MaxGroupsIdx = 0;
  2223. if (!ValueRotsVec[0].Repl32) {
  2224. for (unsigned i = 0, ie = ValueRotsVec.size(); i < ie; ++i)
  2225. if (ValueRotsVec[i].Repl32) {
  2226. if (ValueRotsVec[i].NumGroups > ValueRotsVec[0].NumGroups)
  2227. MaxGroupsIdx = i;
  2228. break;
  2229. }
  2230. }
  2231. ValueRotInfo &VRI = ValueRotsVec[MaxGroupsIdx];
  2232. bool NeedsRotate = false;
  2233. if (VRI.RLAmt) {
  2234. NeedsRotate = true;
  2235. } else if (VRI.Repl32) {
  2236. for (auto &BG : BitGroups) {
  2237. if (BG.V != VRI.V || BG.RLAmt != VRI.RLAmt ||
  2238. BG.Repl32 != VRI.Repl32)
  2239. continue;
  2240. // We don't need a rotate if the bit group is confined to the lower
  2241. // 32 bits.
  2242. if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx < BG.EndIdx)
  2243. continue;
  2244. NeedsRotate = true;
  2245. break;
  2246. }
  2247. }
  2248. if (NeedsRotate)
  2249. Res = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
  2250. VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63,
  2251. InstCnt);
  2252. else
  2253. Res = VRI.V;
  2254. // Now, remove all groups with this underlying value and rotation factor.
  2255. if (Res)
  2256. eraseMatchingBitGroups([VRI](const BitGroup &BG) {
  2257. return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt &&
  2258. BG.Repl32 == VRI.Repl32;
  2259. });
  2260. }
  2261. // Because 64-bit rotates are more flexible than inserts, we might have a
  2262. // preference regarding which one we do first (to save one instruction).
  2263. if (!Res)
  2264. for (auto I = BitGroups.begin(), IE = BitGroups.end(); I != IE; ++I) {
  2265. if (SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
  2266. false) <
  2267. SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
  2268. true)) {
  2269. if (I != BitGroups.begin()) {
  2270. BitGroup BG = *I;
  2271. BitGroups.erase(I);
  2272. BitGroups.insert(BitGroups.begin(), BG);
  2273. }
  2274. break;
  2275. }
  2276. }
  2277. // Insert the other groups (one at a time).
  2278. for (auto &BG : BitGroups) {
  2279. if (!Res)
  2280. Res = SelectRotMask64(BG.V, dl, BG.RLAmt, BG.Repl32, BG.StartIdx,
  2281. BG.EndIdx, InstCnt);
  2282. else
  2283. Res = SelectRotMaskIns64(Res, BG.V, dl, BG.RLAmt, BG.Repl32,
  2284. BG.StartIdx, BG.EndIdx, InstCnt);
  2285. }
  2286. if (LateMask) {
  2287. uint64_t Mask = getZerosMask();
  2288. // We can use the 32-bit andi/andis technique if the mask does not
  2289. // require any higher-order bits. This can save an instruction compared
  2290. // to always using the general 64-bit technique.
  2291. bool Use32BitInsts = isUInt<32>(Mask);
  2292. // Compute the masks for andi/andis that would be necessary.
  2293. unsigned ANDIMask = (Mask & UINT16_MAX),
  2294. ANDISMask = (Mask >> 16) & UINT16_MAX;
  2295. if (Use32BitInsts) {
  2296. assert((ANDIMask != 0 || ANDISMask != 0) &&
  2297. "No set bits in mask when using 32-bit ands for 64-bit value");
  2298. if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
  2299. (unsigned) (ANDISMask != 0) +
  2300. (unsigned) (ANDIMask != 0 && ANDISMask != 0);
  2301. SDValue ANDIVal, ANDISVal;
  2302. if (ANDIMask != 0)
  2303. ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64,
  2304. ExtendToInt64(Res, dl),
  2305. getI32Imm(ANDIMask, dl)),
  2306. 0);
  2307. if (ANDISMask != 0)
  2308. ANDISVal =
  2309. SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64,
  2310. ExtendToInt64(Res, dl),
  2311. getI32Imm(ANDISMask, dl)),
  2312. 0);
  2313. if (!ANDIVal)
  2314. Res = ANDISVal;
  2315. else if (!ANDISVal)
  2316. Res = ANDIVal;
  2317. else
  2318. Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
  2319. ExtendToInt64(ANDIVal, dl), ANDISVal), 0);
  2320. } else {
  2321. unsigned NumOfSelectInsts = 0;
  2322. SDValue MaskVal =
  2323. SDValue(selectI64Imm(CurDAG, dl, Mask, &NumOfSelectInsts), 0);
  2324. Res = SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
  2325. ExtendToInt64(Res, dl), MaskVal),
  2326. 0);
  2327. if (InstCnt)
  2328. *InstCnt += NumOfSelectInsts + /* and */ 1;
  2329. }
  2330. }
  2331. return Res.getNode();
  2332. }
  2333. SDNode *Select(SDNode *N, bool LateMask, unsigned *InstCnt = nullptr) {
  2334. // Fill in BitGroups.
  2335. collectBitGroups(LateMask);
  2336. if (BitGroups.empty())
  2337. return nullptr;
  2338. // For 64-bit values, figure out when we can use 32-bit instructions.
  2339. if (Bits.size() == 64)
  2340. assignRepl32BitGroups();
  2341. // Fill in ValueRotsVec.
  2342. collectValueRotInfo();
  2343. if (Bits.size() == 32) {
  2344. return Select32(N, LateMask, InstCnt);
  2345. } else {
  2346. assert(Bits.size() == 64 && "Not 64 bits here?");
  2347. return Select64(N, LateMask, InstCnt);
  2348. }
  2349. return nullptr;
  2350. }
  2351. void eraseMatchingBitGroups(function_ref<bool(const BitGroup &)> F) {
  2352. erase_if(BitGroups, F);
  2353. }
  2354. SmallVector<ValueBit, 64> Bits;
  2355. bool NeedMask = false;
  2356. SmallVector<unsigned, 64> RLAmt;
  2357. SmallVector<BitGroup, 16> BitGroups;
  2358. DenseMap<std::pair<SDValue, unsigned>, ValueRotInfo> ValueRots;
  2359. SmallVector<ValueRotInfo, 16> ValueRotsVec;
  2360. SelectionDAG *CurDAG = nullptr;
  2361. public:
  2362. BitPermutationSelector(SelectionDAG *DAG)
  2363. : CurDAG(DAG) {}
  2364. // Here we try to match complex bit permutations into a set of
  2365. // rotate-and-shift/shift/and/or instructions, using a set of heuristics
  2366. // known to produce optimal code for common cases (like i32 byte swapping).
  2367. SDNode *Select(SDNode *N) {
  2368. Memoizer.clear();
  2369. auto Result =
  2370. getValueBits(SDValue(N, 0), N->getValueType(0).getSizeInBits());
  2371. if (!Result.first)
  2372. return nullptr;
  2373. Bits = std::move(*Result.second);
  2374. LLVM_DEBUG(dbgs() << "Considering bit-permutation-based instruction"
  2375. " selection for: ");
  2376. LLVM_DEBUG(N->dump(CurDAG));
  2377. // Fill it RLAmt and set NeedMask.
  2378. computeRotationAmounts();
  2379. if (!NeedMask)
  2380. return Select(N, false);
  2381. // We currently have two techniques for handling results with zeros: early
  2382. // masking (the default) and late masking. Late masking is sometimes more
  2383. // efficient, but because the structure of the bit groups is different, it
  2384. // is hard to tell without generating both and comparing the results. With
  2385. // late masking, we ignore zeros in the resulting value when inserting each
  2386. // set of bit groups, and then mask in the zeros at the end. With early
  2387. // masking, we only insert the non-zero parts of the result at every step.
  2388. unsigned InstCnt = 0, InstCntLateMask = 0;
  2389. LLVM_DEBUG(dbgs() << "\tEarly masking:\n");
  2390. SDNode *RN = Select(N, false, &InstCnt);
  2391. LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCnt << " instructions\n");
  2392. LLVM_DEBUG(dbgs() << "\tLate masking:\n");
  2393. SDNode *RNLM = Select(N, true, &InstCntLateMask);
  2394. LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCntLateMask
  2395. << " instructions\n");
  2396. if (InstCnt <= InstCntLateMask) {
  2397. LLVM_DEBUG(dbgs() << "\tUsing early-masking for isel\n");
  2398. return RN;
  2399. }
  2400. LLVM_DEBUG(dbgs() << "\tUsing late-masking for isel\n");
  2401. return RNLM;
  2402. }
  2403. };
  2404. class IntegerCompareEliminator {
  2405. SelectionDAG *CurDAG;
  2406. PPCDAGToDAGISel *S;
  2407. // Conversion type for interpreting results of a 32-bit instruction as
  2408. // a 64-bit value or vice versa.
  2409. enum ExtOrTruncConversion { Ext, Trunc };
  2410. // Modifiers to guide how an ISD::SETCC node's result is to be computed
  2411. // in a GPR.
  2412. // ZExtOrig - use the original condition code, zero-extend value
  2413. // ZExtInvert - invert the condition code, zero-extend value
  2414. // SExtOrig - use the original condition code, sign-extend value
  2415. // SExtInvert - invert the condition code, sign-extend value
  2416. enum SetccInGPROpts { ZExtOrig, ZExtInvert, SExtOrig, SExtInvert };
  2417. // Comparisons against zero to emit GPR code sequences for. Each of these
  2418. // sequences may need to be emitted for two or more equivalent patterns.
  2419. // For example (a >= 0) == (a > -1). The direction of the comparison (</>)
  2420. // matters as well as the extension type: sext (-1/0), zext (1/0).
  2421. // GEZExt - (zext (LHS >= 0))
  2422. // GESExt - (sext (LHS >= 0))
  2423. // LEZExt - (zext (LHS <= 0))
  2424. // LESExt - (sext (LHS <= 0))
  2425. enum ZeroCompare { GEZExt, GESExt, LEZExt, LESExt };
  2426. SDNode *tryEXTEND(SDNode *N);
  2427. SDNode *tryLogicOpOfCompares(SDNode *N);
  2428. SDValue computeLogicOpInGPR(SDValue LogicOp);
  2429. SDValue signExtendInputIfNeeded(SDValue Input);
  2430. SDValue zeroExtendInputIfNeeded(SDValue Input);
  2431. SDValue addExtOrTrunc(SDValue NatWidthRes, ExtOrTruncConversion Conv);
  2432. SDValue getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,
  2433. ZeroCompare CmpTy);
  2434. SDValue get32BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
  2435. int64_t RHSValue, SDLoc dl);
  2436. SDValue get32BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
  2437. int64_t RHSValue, SDLoc dl);
  2438. SDValue get64BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
  2439. int64_t RHSValue, SDLoc dl);
  2440. SDValue get64BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
  2441. int64_t RHSValue, SDLoc dl);
  2442. SDValue getSETCCInGPR(SDValue Compare, SetccInGPROpts ConvOpts);
  2443. public:
  2444. IntegerCompareEliminator(SelectionDAG *DAG,
  2445. PPCDAGToDAGISel *Sel) : CurDAG(DAG), S(Sel) {
  2446. assert(CurDAG->getTargetLoweringInfo()
  2447. .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 &&
  2448. "Only expecting to use this on 64 bit targets.");
  2449. }
  2450. SDNode *Select(SDNode *N) {
  2451. if (CmpInGPR == ICGPR_None)
  2452. return nullptr;
  2453. switch (N->getOpcode()) {
  2454. default: break;
  2455. case ISD::ZERO_EXTEND:
  2456. if (CmpInGPR == ICGPR_Sext || CmpInGPR == ICGPR_SextI32 ||
  2457. CmpInGPR == ICGPR_SextI64)
  2458. return nullptr;
  2459. [[fallthrough]];
  2460. case ISD::SIGN_EXTEND:
  2461. if (CmpInGPR == ICGPR_Zext || CmpInGPR == ICGPR_ZextI32 ||
  2462. CmpInGPR == ICGPR_ZextI64)
  2463. return nullptr;
  2464. return tryEXTEND(N);
  2465. case ISD::AND:
  2466. case ISD::OR:
  2467. case ISD::XOR:
  2468. return tryLogicOpOfCompares(N);
  2469. }
  2470. return nullptr;
  2471. }
  2472. };
  2473. static bool isLogicOp(unsigned Opc) {
  2474. return Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR;
  2475. }
  2476. // The obvious case for wanting to keep the value in a GPR. Namely, the
  2477. // result of the comparison is actually needed in a GPR.
  2478. SDNode *IntegerCompareEliminator::tryEXTEND(SDNode *N) {
  2479. assert((N->getOpcode() == ISD::ZERO_EXTEND ||
  2480. N->getOpcode() == ISD::SIGN_EXTEND) &&
  2481. "Expecting a zero/sign extend node!");
  2482. SDValue WideRes;
  2483. // If we are zero-extending the result of a logical operation on i1
  2484. // values, we can keep the values in GPRs.
  2485. if (isLogicOp(N->getOperand(0).getOpcode()) &&
  2486. N->getOperand(0).getValueType() == MVT::i1 &&
  2487. N->getOpcode() == ISD::ZERO_EXTEND)
  2488. WideRes = computeLogicOpInGPR(N->getOperand(0));
  2489. else if (N->getOperand(0).getOpcode() != ISD::SETCC)
  2490. return nullptr;
  2491. else
  2492. WideRes =
  2493. getSETCCInGPR(N->getOperand(0),
  2494. N->getOpcode() == ISD::SIGN_EXTEND ?
  2495. SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig);
  2496. if (!WideRes)
  2497. return nullptr;
  2498. SDLoc dl(N);
  2499. bool Input32Bit = WideRes.getValueType() == MVT::i32;
  2500. bool Output32Bit = N->getValueType(0) == MVT::i32;
  2501. NumSextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 1 : 0;
  2502. NumZextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 0 : 1;
  2503. SDValue ConvOp = WideRes;
  2504. if (Input32Bit != Output32Bit)
  2505. ConvOp = addExtOrTrunc(WideRes, Input32Bit ? ExtOrTruncConversion::Ext :
  2506. ExtOrTruncConversion::Trunc);
  2507. return ConvOp.getNode();
  2508. }
  2509. // Attempt to perform logical operations on the results of comparisons while
  2510. // keeping the values in GPRs. Without doing so, these would end up being
  2511. // lowered to CR-logical operations which suffer from significant latency and
  2512. // low ILP.
  2513. SDNode *IntegerCompareEliminator::tryLogicOpOfCompares(SDNode *N) {
  2514. if (N->getValueType(0) != MVT::i1)
  2515. return nullptr;
  2516. assert(isLogicOp(N->getOpcode()) &&
  2517. "Expected a logic operation on setcc results.");
  2518. SDValue LoweredLogical = computeLogicOpInGPR(SDValue(N, 0));
  2519. if (!LoweredLogical)
  2520. return nullptr;
  2521. SDLoc dl(N);
  2522. bool IsBitwiseNegate = LoweredLogical.getMachineOpcode() == PPC::XORI8;
  2523. unsigned SubRegToExtract = IsBitwiseNegate ? PPC::sub_eq : PPC::sub_gt;
  2524. SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
  2525. SDValue LHS = LoweredLogical.getOperand(0);
  2526. SDValue RHS = LoweredLogical.getOperand(1);
  2527. SDValue WideOp;
  2528. SDValue OpToConvToRecForm;
  2529. // Look through any 32-bit to 64-bit implicit extend nodes to find the
  2530. // opcode that is input to the XORI.
  2531. if (IsBitwiseNegate &&
  2532. LoweredLogical.getOperand(0).getMachineOpcode() == PPC::INSERT_SUBREG)
  2533. OpToConvToRecForm = LoweredLogical.getOperand(0).getOperand(1);
  2534. else if (IsBitwiseNegate)
  2535. // If the input to the XORI isn't an extension, that's what we're after.
  2536. OpToConvToRecForm = LoweredLogical.getOperand(0);
  2537. else
  2538. // If this is not an XORI, it is a reg-reg logical op and we can convert
  2539. // it to record-form.
  2540. OpToConvToRecForm = LoweredLogical;
  2541. // Get the record-form version of the node we're looking to use to get the
  2542. // CR result from.
  2543. uint16_t NonRecOpc = OpToConvToRecForm.getMachineOpcode();
  2544. int NewOpc = PPCInstrInfo::getRecordFormOpcode(NonRecOpc);
  2545. // Convert the right node to record-form. This is either the logical we're
  2546. // looking at or it is the input node to the negation (if we're looking at
  2547. // a bitwise negation).
  2548. if (NewOpc != -1 && IsBitwiseNegate) {
  2549. // The input to the XORI has a record-form. Use it.
  2550. assert(LoweredLogical.getConstantOperandVal(1) == 1 &&
  2551. "Expected a PPC::XORI8 only for bitwise negation.");
  2552. // Emit the record-form instruction.
  2553. std::vector<SDValue> Ops;
  2554. for (int i = 0, e = OpToConvToRecForm.getNumOperands(); i < e; i++)
  2555. Ops.push_back(OpToConvToRecForm.getOperand(i));
  2556. WideOp =
  2557. SDValue(CurDAG->getMachineNode(NewOpc, dl,
  2558. OpToConvToRecForm.getValueType(),
  2559. MVT::Glue, Ops), 0);
  2560. } else {
  2561. assert((NewOpc != -1 || !IsBitwiseNegate) &&
  2562. "No record form available for AND8/OR8/XOR8?");
  2563. WideOp =
  2564. SDValue(CurDAG->getMachineNode(NewOpc == -1 ? PPC::ANDI8_rec : NewOpc,
  2565. dl, MVT::i64, MVT::Glue, LHS, RHS),
  2566. 0);
  2567. }
  2568. // Select this node to a single bit from CR0 set by the record-form node
  2569. // just created. For bitwise negation, use the EQ bit which is the equivalent
  2570. // of negating the result (i.e. it is a bit set when the result of the
  2571. // operation is zero).
  2572. SDValue SRIdxVal =
  2573. CurDAG->getTargetConstant(SubRegToExtract, dl, MVT::i32);
  2574. SDValue CRBit =
  2575. SDValue(CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
  2576. MVT::i1, CR0Reg, SRIdxVal,
  2577. WideOp.getValue(1)), 0);
  2578. return CRBit.getNode();
  2579. }
  2580. // Lower a logical operation on i1 values into a GPR sequence if possible.
  2581. // The result can be kept in a GPR if requested.
  2582. // Three types of inputs can be handled:
  2583. // - SETCC
  2584. // - TRUNCATE
  2585. // - Logical operation (AND/OR/XOR)
  2586. // There is also a special case that is handled (namely a complement operation
  2587. // achieved with xor %a, -1).
  2588. SDValue IntegerCompareEliminator::computeLogicOpInGPR(SDValue LogicOp) {
  2589. assert(isLogicOp(LogicOp.getOpcode()) &&
  2590. "Can only handle logic operations here.");
  2591. assert(LogicOp.getValueType() == MVT::i1 &&
  2592. "Can only handle logic operations on i1 values here.");
  2593. SDLoc dl(LogicOp);
  2594. SDValue LHS, RHS;
  2595. // Special case: xor %a, -1
  2596. bool IsBitwiseNegation = isBitwiseNot(LogicOp);
  2597. // Produces a GPR sequence for each operand of the binary logic operation.
  2598. // For SETCC, it produces the respective comparison, for TRUNCATE it truncates
  2599. // the value in a GPR and for logic operations, it will recursively produce
  2600. // a GPR sequence for the operation.
  2601. auto getLogicOperand = [&] (SDValue Operand) -> SDValue {
  2602. unsigned OperandOpcode = Operand.getOpcode();
  2603. if (OperandOpcode == ISD::SETCC)
  2604. return getSETCCInGPR(Operand, SetccInGPROpts::ZExtOrig);
  2605. else if (OperandOpcode == ISD::TRUNCATE) {
  2606. SDValue InputOp = Operand.getOperand(0);
  2607. EVT InVT = InputOp.getValueType();
  2608. return SDValue(CurDAG->getMachineNode(InVT == MVT::i32 ? PPC::RLDICL_32 :
  2609. PPC::RLDICL, dl, InVT, InputOp,
  2610. S->getI64Imm(0, dl),
  2611. S->getI64Imm(63, dl)), 0);
  2612. } else if (isLogicOp(OperandOpcode))
  2613. return computeLogicOpInGPR(Operand);
  2614. return SDValue();
  2615. };
  2616. LHS = getLogicOperand(LogicOp.getOperand(0));
  2617. RHS = getLogicOperand(LogicOp.getOperand(1));
  2618. // If a GPR sequence can't be produced for the LHS we can't proceed.
  2619. // Not producing a GPR sequence for the RHS is only a problem if this isn't
  2620. // a bitwise negation operation.
  2621. if (!LHS || (!RHS && !IsBitwiseNegation))
  2622. return SDValue();
  2623. NumLogicOpsOnComparison++;
  2624. // We will use the inputs as 64-bit values.
  2625. if (LHS.getValueType() == MVT::i32)
  2626. LHS = addExtOrTrunc(LHS, ExtOrTruncConversion::Ext);
  2627. if (!IsBitwiseNegation && RHS.getValueType() == MVT::i32)
  2628. RHS = addExtOrTrunc(RHS, ExtOrTruncConversion::Ext);
  2629. unsigned NewOpc;
  2630. switch (LogicOp.getOpcode()) {
  2631. default: llvm_unreachable("Unknown logic operation.");
  2632. case ISD::AND: NewOpc = PPC::AND8; break;
  2633. case ISD::OR: NewOpc = PPC::OR8; break;
  2634. case ISD::XOR: NewOpc = PPC::XOR8; break;
  2635. }
  2636. if (IsBitwiseNegation) {
  2637. RHS = S->getI64Imm(1, dl);
  2638. NewOpc = PPC::XORI8;
  2639. }
  2640. return SDValue(CurDAG->getMachineNode(NewOpc, dl, MVT::i64, LHS, RHS), 0);
  2641. }
  2642. /// If the value isn't guaranteed to be sign-extended to 64-bits, extend it.
  2643. /// Otherwise just reinterpret it as a 64-bit value.
  2644. /// Useful when emitting comparison code for 32-bit values without using
  2645. /// the compare instruction (which only considers the lower 32-bits).
  2646. SDValue IntegerCompareEliminator::signExtendInputIfNeeded(SDValue Input) {
  2647. assert(Input.getValueType() == MVT::i32 &&
  2648. "Can only sign-extend 32-bit values here.");
  2649. unsigned Opc = Input.getOpcode();
  2650. // The value was sign extended and then truncated to 32-bits. No need to
  2651. // sign extend it again.
  2652. if (Opc == ISD::TRUNCATE &&
  2653. (Input.getOperand(0).getOpcode() == ISD::AssertSext ||
  2654. Input.getOperand(0).getOpcode() == ISD::SIGN_EXTEND))
  2655. return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
  2656. LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
  2657. // The input is a sign-extending load. All ppc sign-extending loads
  2658. // sign-extend to the full 64-bits.
  2659. if (InputLoad && InputLoad->getExtensionType() == ISD::SEXTLOAD)
  2660. return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
  2661. ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);
  2662. // We don't sign-extend constants.
  2663. if (InputConst)
  2664. return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
  2665. SDLoc dl(Input);
  2666. SignExtensionsAdded++;
  2667. return SDValue(CurDAG->getMachineNode(PPC::EXTSW_32_64, dl,
  2668. MVT::i64, Input), 0);
  2669. }
  2670. /// If the value isn't guaranteed to be zero-extended to 64-bits, extend it.
  2671. /// Otherwise just reinterpret it as a 64-bit value.
  2672. /// Useful when emitting comparison code for 32-bit values without using
  2673. /// the compare instruction (which only considers the lower 32-bits).
  2674. SDValue IntegerCompareEliminator::zeroExtendInputIfNeeded(SDValue Input) {
  2675. assert(Input.getValueType() == MVT::i32 &&
  2676. "Can only zero-extend 32-bit values here.");
  2677. unsigned Opc = Input.getOpcode();
  2678. // The only condition under which we can omit the actual extend instruction:
  2679. // - The value is a positive constant
  2680. // - The value comes from a load that isn't a sign-extending load
  2681. // An ISD::TRUNCATE needs to be zero-extended unless it is fed by a zext.
  2682. bool IsTruncateOfZExt = Opc == ISD::TRUNCATE &&
  2683. (Input.getOperand(0).getOpcode() == ISD::AssertZext ||
  2684. Input.getOperand(0).getOpcode() == ISD::ZERO_EXTEND);
  2685. if (IsTruncateOfZExt)
  2686. return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
  2687. ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);
  2688. if (InputConst && InputConst->getSExtValue() >= 0)
  2689. return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
  2690. LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
  2691. // The input is a load that doesn't sign-extend (it will be zero-extended).
  2692. if (InputLoad && InputLoad->getExtensionType() != ISD::SEXTLOAD)
  2693. return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
  2694. // None of the above, need to zero-extend.
  2695. SDLoc dl(Input);
  2696. ZeroExtensionsAdded++;
  2697. return SDValue(CurDAG->getMachineNode(PPC::RLDICL_32_64, dl, MVT::i64, Input,
  2698. S->getI64Imm(0, dl),
  2699. S->getI64Imm(32, dl)), 0);
  2700. }
  2701. // Handle a 32-bit value in a 64-bit register and vice-versa. These are of
  2702. // course not actual zero/sign extensions that will generate machine code,
  2703. // they're just a way to reinterpret a 32 bit value in a register as a
  2704. // 64 bit value and vice-versa.
  2705. SDValue IntegerCompareEliminator::addExtOrTrunc(SDValue NatWidthRes,
  2706. ExtOrTruncConversion Conv) {
  2707. SDLoc dl(NatWidthRes);
  2708. // For reinterpreting 32-bit values as 64 bit values, we generate
  2709. // INSERT_SUBREG IMPLICIT_DEF:i64, <input>, TargetConstant:i32<1>
  2710. if (Conv == ExtOrTruncConversion::Ext) {
  2711. SDValue ImDef(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, MVT::i64), 0);
  2712. SDValue SubRegIdx =
  2713. CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
  2714. return SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, MVT::i64,
  2715. ImDef, NatWidthRes, SubRegIdx), 0);
  2716. }
  2717. assert(Conv == ExtOrTruncConversion::Trunc &&
  2718. "Unknown convertion between 32 and 64 bit values.");
  2719. // For reinterpreting 64-bit values as 32-bit values, we just need to
  2720. // EXTRACT_SUBREG (i.e. extract the low word).
  2721. SDValue SubRegIdx =
  2722. CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
  2723. return SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, MVT::i32,
  2724. NatWidthRes, SubRegIdx), 0);
  2725. }
  2726. // Produce a GPR sequence for compound comparisons (<=, >=) against zero.
  2727. // Handle both zero-extensions and sign-extensions.
  2728. SDValue
  2729. IntegerCompareEliminator::getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,
  2730. ZeroCompare CmpTy) {
  2731. EVT InVT = LHS.getValueType();
  2732. bool Is32Bit = InVT == MVT::i32;
  2733. SDValue ToExtend;
  2734. // Produce the value that needs to be either zero or sign extended.
  2735. switch (CmpTy) {
  2736. case ZeroCompare::GEZExt:
  2737. case ZeroCompare::GESExt:
  2738. ToExtend = SDValue(CurDAG->getMachineNode(Is32Bit ? PPC::NOR : PPC::NOR8,
  2739. dl, InVT, LHS, LHS), 0);
  2740. break;
  2741. case ZeroCompare::LEZExt:
  2742. case ZeroCompare::LESExt: {
  2743. if (Is32Bit) {
  2744. // Upper 32 bits cannot be undefined for this sequence.
  2745. LHS = signExtendInputIfNeeded(LHS);
  2746. SDValue Neg =
  2747. SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
  2748. ToExtend =
  2749. SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
  2750. Neg, S->getI64Imm(1, dl),
  2751. S->getI64Imm(63, dl)), 0);
  2752. } else {
  2753. SDValue Addi =
  2754. SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
  2755. S->getI64Imm(~0ULL, dl)), 0);
  2756. ToExtend = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
  2757. Addi, LHS), 0);
  2758. }
  2759. break;
  2760. }
  2761. }
  2762. // For 64-bit sequences, the extensions are the same for the GE/LE cases.
  2763. if (!Is32Bit &&
  2764. (CmpTy == ZeroCompare::GEZExt || CmpTy == ZeroCompare::LEZExt))
  2765. return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
  2766. ToExtend, S->getI64Imm(1, dl),
  2767. S->getI64Imm(63, dl)), 0);
  2768. if (!Is32Bit &&
  2769. (CmpTy == ZeroCompare::GESExt || CmpTy == ZeroCompare::LESExt))
  2770. return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, ToExtend,
  2771. S->getI64Imm(63, dl)), 0);
  2772. assert(Is32Bit && "Should have handled the 32-bit sequences above.");
  2773. // For 32-bit sequences, the extensions differ between GE/LE cases.
  2774. switch (CmpTy) {
  2775. case ZeroCompare::GEZExt: {
  2776. SDValue ShiftOps[] = { ToExtend, S->getI32Imm(1, dl), S->getI32Imm(31, dl),
  2777. S->getI32Imm(31, dl) };
  2778. return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
  2779. ShiftOps), 0);
  2780. }
  2781. case ZeroCompare::GESExt:
  2782. return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, ToExtend,
  2783. S->getI32Imm(31, dl)), 0);
  2784. case ZeroCompare::LEZExt:
  2785. return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, ToExtend,
  2786. S->getI32Imm(1, dl)), 0);
  2787. case ZeroCompare::LESExt:
  2788. return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, ToExtend,
  2789. S->getI32Imm(-1, dl)), 0);
  2790. }
  2791. // The above case covers all the enumerators so it can't have a default clause
  2792. // to avoid compiler warnings.
  2793. llvm_unreachable("Unknown zero-comparison type.");
  2794. }
  2795. /// Produces a zero-extended result of comparing two 32-bit values according to
  2796. /// the passed condition code.
  2797. SDValue
  2798. IntegerCompareEliminator::get32BitZExtCompare(SDValue LHS, SDValue RHS,
  2799. ISD::CondCode CC,
  2800. int64_t RHSValue, SDLoc dl) {
  2801. if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 ||
  2802. CmpInGPR == ICGPR_ZextI64 || CmpInGPR == ICGPR_Sext)
  2803. return SDValue();
  2804. bool IsRHSZero = RHSValue == 0;
  2805. bool IsRHSOne = RHSValue == 1;
  2806. bool IsRHSNegOne = RHSValue == -1LL;
  2807. switch (CC) {
  2808. default: return SDValue();
  2809. case ISD::SETEQ: {
  2810. // (zext (setcc %a, %b, seteq)) -> (lshr (cntlzw (xor %a, %b)), 5)
  2811. // (zext (setcc %a, 0, seteq)) -> (lshr (cntlzw %a), 5)
  2812. SDValue Xor = IsRHSZero ? LHS :
  2813. SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
  2814. SDValue Clz =
  2815. SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
  2816. SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl),
  2817. S->getI32Imm(31, dl) };
  2818. return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
  2819. ShiftOps), 0);
  2820. }
  2821. case ISD::SETNE: {
  2822. // (zext (setcc %a, %b, setne)) -> (xor (lshr (cntlzw (xor %a, %b)), 5), 1)
  2823. // (zext (setcc %a, 0, setne)) -> (xor (lshr (cntlzw %a), 5), 1)
  2824. SDValue Xor = IsRHSZero ? LHS :
  2825. SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
  2826. SDValue Clz =
  2827. SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
  2828. SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl),
  2829. S->getI32Imm(31, dl) };
  2830. SDValue Shift =
  2831. SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);
  2832. return SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,
  2833. S->getI32Imm(1, dl)), 0);
  2834. }
  2835. case ISD::SETGE: {
  2836. // (zext (setcc %a, %b, setge)) -> (xor (lshr (sub %a, %b), 63), 1)
  2837. // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 31)
  2838. if(IsRHSZero)
  2839. return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
  2840. // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
  2841. // by swapping inputs and falling through.
  2842. std::swap(LHS, RHS);
  2843. ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
  2844. IsRHSZero = RHSConst && RHSConst->isZero();
  2845. [[fallthrough]];
  2846. }
  2847. case ISD::SETLE: {
  2848. if (CmpInGPR == ICGPR_NonExtIn)
  2849. return SDValue();
  2850. // (zext (setcc %a, %b, setle)) -> (xor (lshr (sub %b, %a), 63), 1)
  2851. // (zext (setcc %a, 0, setle)) -> (xor (lshr (- %a), 63), 1)
  2852. if(IsRHSZero) {
  2853. if (CmpInGPR == ICGPR_NonExtIn)
  2854. return SDValue();
  2855. return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
  2856. }
  2857. // The upper 32-bits of the register can't be undefined for this sequence.
  2858. LHS = signExtendInputIfNeeded(LHS);
  2859. RHS = signExtendInputIfNeeded(RHS);
  2860. SDValue Sub =
  2861. SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
  2862. SDValue Shift =
  2863. SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Sub,
  2864. S->getI64Imm(1, dl), S->getI64Imm(63, dl)),
  2865. 0);
  2866. return
  2867. SDValue(CurDAG->getMachineNode(PPC::XORI8, dl,
  2868. MVT::i64, Shift, S->getI32Imm(1, dl)), 0);
  2869. }
  2870. case ISD::SETGT: {
  2871. // (zext (setcc %a, %b, setgt)) -> (lshr (sub %b, %a), 63)
  2872. // (zext (setcc %a, -1, setgt)) -> (lshr (~ %a), 31)
  2873. // (zext (setcc %a, 0, setgt)) -> (lshr (- %a), 63)
  2874. // Handle SETLT -1 (which is equivalent to SETGE 0).
  2875. if (IsRHSNegOne)
  2876. return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
  2877. if (IsRHSZero) {
  2878. if (CmpInGPR == ICGPR_NonExtIn)
  2879. return SDValue();
  2880. // The upper 32-bits of the register can't be undefined for this sequence.
  2881. LHS = signExtendInputIfNeeded(LHS);
  2882. RHS = signExtendInputIfNeeded(RHS);
  2883. SDValue Neg =
  2884. SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
  2885. return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
  2886. Neg, S->getI32Imm(1, dl), S->getI32Imm(63, dl)), 0);
  2887. }
  2888. // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
  2889. // (%b < %a) by swapping inputs and falling through.
  2890. std::swap(LHS, RHS);
  2891. ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
  2892. IsRHSZero = RHSConst && RHSConst->isZero();
  2893. IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
  2894. [[fallthrough]];
  2895. }
  2896. case ISD::SETLT: {
  2897. // (zext (setcc %a, %b, setlt)) -> (lshr (sub %a, %b), 63)
  2898. // (zext (setcc %a, 1, setlt)) -> (xor (lshr (- %a), 63), 1)
  2899. // (zext (setcc %a, 0, setlt)) -> (lshr %a, 31)
  2900. // Handle SETLT 1 (which is equivalent to SETLE 0).
  2901. if (IsRHSOne) {
  2902. if (CmpInGPR == ICGPR_NonExtIn)
  2903. return SDValue();
  2904. return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
  2905. }
  2906. if (IsRHSZero) {
  2907. SDValue ShiftOps[] = { LHS, S->getI32Imm(1, dl), S->getI32Imm(31, dl),
  2908. S->getI32Imm(31, dl) };
  2909. return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
  2910. ShiftOps), 0);
  2911. }
  2912. if (CmpInGPR == ICGPR_NonExtIn)
  2913. return SDValue();
  2914. // The upper 32-bits of the register can't be undefined for this sequence.
  2915. LHS = signExtendInputIfNeeded(LHS);
  2916. RHS = signExtendInputIfNeeded(RHS);
  2917. SDValue SUBFNode =
  2918. SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
  2919. return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
  2920. SUBFNode, S->getI64Imm(1, dl),
  2921. S->getI64Imm(63, dl)), 0);
  2922. }
  2923. case ISD::SETUGE:
  2924. // (zext (setcc %a, %b, setuge)) -> (xor (lshr (sub %b, %a), 63), 1)
  2925. // (zext (setcc %a, %b, setule)) -> (xor (lshr (sub %a, %b), 63), 1)
  2926. std::swap(LHS, RHS);
  2927. [[fallthrough]];
  2928. case ISD::SETULE: {
  2929. if (CmpInGPR == ICGPR_NonExtIn)
  2930. return SDValue();
  2931. // The upper 32-bits of the register can't be undefined for this sequence.
  2932. LHS = zeroExtendInputIfNeeded(LHS);
  2933. RHS = zeroExtendInputIfNeeded(RHS);
  2934. SDValue Subtract =
  2935. SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
  2936. SDValue SrdiNode =
  2937. SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
  2938. Subtract, S->getI64Imm(1, dl),
  2939. S->getI64Imm(63, dl)), 0);
  2940. return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, SrdiNode,
  2941. S->getI32Imm(1, dl)), 0);
  2942. }
  2943. case ISD::SETUGT:
  2944. // (zext (setcc %a, %b, setugt)) -> (lshr (sub %b, %a), 63)
  2945. // (zext (setcc %a, %b, setult)) -> (lshr (sub %a, %b), 63)
  2946. std::swap(LHS, RHS);
  2947. [[fallthrough]];
  2948. case ISD::SETULT: {
  2949. if (CmpInGPR == ICGPR_NonExtIn)
  2950. return SDValue();
  2951. // The upper 32-bits of the register can't be undefined for this sequence.
  2952. LHS = zeroExtendInputIfNeeded(LHS);
  2953. RHS = zeroExtendInputIfNeeded(RHS);
  2954. SDValue Subtract =
  2955. SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
  2956. return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
  2957. Subtract, S->getI64Imm(1, dl),
  2958. S->getI64Imm(63, dl)), 0);
  2959. }
  2960. }
  2961. }
  2962. /// Produces a sign-extended result of comparing two 32-bit values according to
  2963. /// the passed condition code.
  2964. SDValue
  2965. IntegerCompareEliminator::get32BitSExtCompare(SDValue LHS, SDValue RHS,
  2966. ISD::CondCode CC,
  2967. int64_t RHSValue, SDLoc dl) {
  2968. if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 ||
  2969. CmpInGPR == ICGPR_ZextI64 || CmpInGPR == ICGPR_Zext)
  2970. return SDValue();
  2971. bool IsRHSZero = RHSValue == 0;
  2972. bool IsRHSOne = RHSValue == 1;
  2973. bool IsRHSNegOne = RHSValue == -1LL;
  2974. switch (CC) {
  2975. default: return SDValue();
  2976. case ISD::SETEQ: {
  2977. // (sext (setcc %a, %b, seteq)) ->
  2978. // (ashr (shl (ctlz (xor %a, %b)), 58), 63)
  2979. // (sext (setcc %a, 0, seteq)) ->
  2980. // (ashr (shl (ctlz %a), 58), 63)
  2981. SDValue CountInput = IsRHSZero ? LHS :
  2982. SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
  2983. SDValue Cntlzw =
  2984. SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, CountInput), 0);
  2985. SDValue SHLOps[] = { Cntlzw, S->getI32Imm(27, dl),
  2986. S->getI32Imm(5, dl), S->getI32Imm(31, dl) };
  2987. SDValue Slwi =
  2988. SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, SHLOps), 0);
  2989. return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Slwi), 0);
  2990. }
  2991. case ISD::SETNE: {
  2992. // Bitwise xor the operands, count leading zeros, shift right by 5 bits and
  2993. // flip the bit, finally take 2's complement.
  2994. // (sext (setcc %a, %b, setne)) ->
  2995. // (neg (xor (lshr (ctlz (xor %a, %b)), 5), 1))
  2996. // Same as above, but the first xor is not needed.
  2997. // (sext (setcc %a, 0, setne)) ->
  2998. // (neg (xor (lshr (ctlz %a), 5), 1))
  2999. SDValue Xor = IsRHSZero ? LHS :
  3000. SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
  3001. SDValue Clz =
  3002. SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
  3003. SDValue ShiftOps[] =
  3004. { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), S->getI32Imm(31, dl) };
  3005. SDValue Shift =
  3006. SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);
  3007. SDValue Xori =
  3008. SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,
  3009. S->getI32Imm(1, dl)), 0);
  3010. return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Xori), 0);
  3011. }
  3012. case ISD::SETGE: {
  3013. // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %a, %b), 63), -1)
  3014. // (sext (setcc %a, 0, setge)) -> (ashr (~ %a), 31)
  3015. if (IsRHSZero)
  3016. return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
  3017. // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
  3018. // by swapping inputs and falling through.
  3019. std::swap(LHS, RHS);
  3020. ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
  3021. IsRHSZero = RHSConst && RHSConst->isZero();
  3022. [[fallthrough]];
  3023. }
  3024. case ISD::SETLE: {
  3025. if (CmpInGPR == ICGPR_NonExtIn)
  3026. return SDValue();
  3027. // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %b, %a), 63), -1)
  3028. // (sext (setcc %a, 0, setle)) -> (add (lshr (- %a), 63), -1)
  3029. if (IsRHSZero)
  3030. return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
  3031. // The upper 32-bits of the register can't be undefined for this sequence.
  3032. LHS = signExtendInputIfNeeded(LHS);
  3033. RHS = signExtendInputIfNeeded(RHS);
  3034. SDValue SUBFNode =
  3035. SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, MVT::Glue,
  3036. LHS, RHS), 0);
  3037. SDValue Srdi =
  3038. SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
  3039. SUBFNode, S->getI64Imm(1, dl),
  3040. S->getI64Imm(63, dl)), 0);
  3041. return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Srdi,
  3042. S->getI32Imm(-1, dl)), 0);
  3043. }
  3044. case ISD::SETGT: {
  3045. // (sext (setcc %a, %b, setgt)) -> (ashr (sub %b, %a), 63)
  3046. // (sext (setcc %a, -1, setgt)) -> (ashr (~ %a), 31)
  3047. // (sext (setcc %a, 0, setgt)) -> (ashr (- %a), 63)
  3048. if (IsRHSNegOne)
  3049. return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
  3050. if (IsRHSZero) {
  3051. if (CmpInGPR == ICGPR_NonExtIn)
  3052. return SDValue();
  3053. // The upper 32-bits of the register can't be undefined for this sequence.
  3054. LHS = signExtendInputIfNeeded(LHS);
  3055. RHS = signExtendInputIfNeeded(RHS);
  3056. SDValue Neg =
  3057. SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
  3058. return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Neg,
  3059. S->getI64Imm(63, dl)), 0);
  3060. }
  3061. // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
  3062. // (%b < %a) by swapping inputs and falling through.
  3063. std::swap(LHS, RHS);
  3064. ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
  3065. IsRHSZero = RHSConst && RHSConst->isZero();
  3066. IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
  3067. [[fallthrough]];
  3068. }
  3069. case ISD::SETLT: {
  3070. // (sext (setcc %a, %b, setgt)) -> (ashr (sub %a, %b), 63)
  3071. // (sext (setcc %a, 1, setgt)) -> (add (lshr (- %a), 63), -1)
  3072. // (sext (setcc %a, 0, setgt)) -> (ashr %a, 31)
  3073. if (IsRHSOne) {
  3074. if (CmpInGPR == ICGPR_NonExtIn)
  3075. return SDValue();
  3076. return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
  3077. }
  3078. if (IsRHSZero)
  3079. return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, LHS,
  3080. S->getI32Imm(31, dl)), 0);
  3081. if (CmpInGPR == ICGPR_NonExtIn)
  3082. return SDValue();
  3083. // The upper 32-bits of the register can't be undefined for this sequence.
  3084. LHS = signExtendInputIfNeeded(LHS);
  3085. RHS = signExtendInputIfNeeded(RHS);
  3086. SDValue SUBFNode =
  3087. SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
  3088. return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
  3089. SUBFNode, S->getI64Imm(63, dl)), 0);
  3090. }
  3091. case ISD::SETUGE:
  3092. // (sext (setcc %a, %b, setuge)) -> (add (lshr (sub %a, %b), 63), -1)
  3093. // (sext (setcc %a, %b, setule)) -> (add (lshr (sub %b, %a), 63), -1)
  3094. std::swap(LHS, RHS);
  3095. [[fallthrough]];
  3096. case ISD::SETULE: {
  3097. if (CmpInGPR == ICGPR_NonExtIn)
  3098. return SDValue();
  3099. // The upper 32-bits of the register can't be undefined for this sequence.
  3100. LHS = zeroExtendInputIfNeeded(LHS);
  3101. RHS = zeroExtendInputIfNeeded(RHS);
  3102. SDValue Subtract =
  3103. SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
  3104. SDValue Shift =
  3105. SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Subtract,
  3106. S->getI32Imm(1, dl), S->getI32Imm(63,dl)),
  3107. 0);
  3108. return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Shift,
  3109. S->getI32Imm(-1, dl)), 0);
  3110. }
  3111. case ISD::SETUGT:
  3112. // (sext (setcc %a, %b, setugt)) -> (ashr (sub %b, %a), 63)
  3113. // (sext (setcc %a, %b, setugt)) -> (ashr (sub %a, %b), 63)
  3114. std::swap(LHS, RHS);
  3115. [[fallthrough]];
  3116. case ISD::SETULT: {
  3117. if (CmpInGPR == ICGPR_NonExtIn)
  3118. return SDValue();
  3119. // The upper 32-bits of the register can't be undefined for this sequence.
  3120. LHS = zeroExtendInputIfNeeded(LHS);
  3121. RHS = zeroExtendInputIfNeeded(RHS);
  3122. SDValue Subtract =
  3123. SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
  3124. return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
  3125. Subtract, S->getI64Imm(63, dl)), 0);
  3126. }
  3127. }
  3128. }
  3129. /// Produces a zero-extended result of comparing two 64-bit values according to
  3130. /// the passed condition code.
  3131. SDValue
  3132. IntegerCompareEliminator::get64BitZExtCompare(SDValue LHS, SDValue RHS,
  3133. ISD::CondCode CC,
  3134. int64_t RHSValue, SDLoc dl) {
  3135. if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 ||
  3136. CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_Sext)
  3137. return SDValue();
  3138. bool IsRHSZero = RHSValue == 0;
  3139. bool IsRHSOne = RHSValue == 1;
  3140. bool IsRHSNegOne = RHSValue == -1LL;
  3141. switch (CC) {
  3142. default: return SDValue();
  3143. case ISD::SETEQ: {
  3144. // (zext (setcc %a, %b, seteq)) -> (lshr (ctlz (xor %a, %b)), 6)
  3145. // (zext (setcc %a, 0, seteq)) -> (lshr (ctlz %a), 6)
  3146. SDValue Xor = IsRHSZero ? LHS :
  3147. SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
  3148. SDValue Clz =
  3149. SDValue(CurDAG->getMachineNode(PPC::CNTLZD, dl, MVT::i64, Xor), 0);
  3150. return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Clz,
  3151. S->getI64Imm(58, dl),
  3152. S->getI64Imm(63, dl)), 0);
  3153. }
  3154. case ISD::SETNE: {
  3155. // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
  3156. // (zext (setcc %a, %b, setne)) -> (sube addc.reg, addc.reg, addc.CA)
  3157. // {addcz.reg, addcz.CA} = (addcarry %a, -1)
  3158. // (zext (setcc %a, 0, setne)) -> (sube addcz.reg, addcz.reg, addcz.CA)
  3159. SDValue Xor = IsRHSZero ? LHS :
  3160. SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
  3161. SDValue AC =
  3162. SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,
  3163. Xor, S->getI32Imm(~0U, dl)), 0);
  3164. return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, AC,
  3165. Xor, AC.getValue(1)), 0);
  3166. }
  3167. case ISD::SETGE: {
  3168. // {subc.reg, subc.CA} = (subcarry %a, %b)
  3169. // (zext (setcc %a, %b, setge)) ->
  3170. // (adde (lshr %b, 63), (ashr %a, 63), subc.CA)
  3171. // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 63)
  3172. if (IsRHSZero)
  3173. return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
  3174. std::swap(LHS, RHS);
  3175. ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
  3176. IsRHSZero = RHSConst && RHSConst->isZero();
  3177. [[fallthrough]];
  3178. }
  3179. case ISD::SETLE: {
  3180. // {subc.reg, subc.CA} = (subcarry %b, %a)
  3181. // (zext (setcc %a, %b, setge)) ->
  3182. // (adde (lshr %a, 63), (ashr %b, 63), subc.CA)
  3183. // (zext (setcc %a, 0, setge)) -> (lshr (or %a, (add %a, -1)), 63)
  3184. if (IsRHSZero)
  3185. return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
  3186. SDValue ShiftL =
  3187. SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
  3188. S->getI64Imm(1, dl),
  3189. S->getI64Imm(63, dl)), 0);
  3190. SDValue ShiftR =
  3191. SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS,
  3192. S->getI64Imm(63, dl)), 0);
  3193. SDValue SubtractCarry =
  3194. SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
  3195. LHS, RHS), 1);
  3196. return SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
  3197. ShiftR, ShiftL, SubtractCarry), 0);
  3198. }
  3199. case ISD::SETGT: {
  3200. // {subc.reg, subc.CA} = (subcarry %b, %a)
  3201. // (zext (setcc %a, %b, setgt)) ->
  3202. // (xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
  3203. // (zext (setcc %a, 0, setgt)) -> (lshr (nor (add %a, -1), %a), 63)
  3204. if (IsRHSNegOne)
  3205. return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
  3206. if (IsRHSZero) {
  3207. SDValue Addi =
  3208. SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
  3209. S->getI64Imm(~0ULL, dl)), 0);
  3210. SDValue Nor =
  3211. SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Addi, LHS), 0);
  3212. return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Nor,
  3213. S->getI64Imm(1, dl),
  3214. S->getI64Imm(63, dl)), 0);
  3215. }
  3216. std::swap(LHS, RHS);
  3217. ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
  3218. IsRHSZero = RHSConst && RHSConst->isZero();
  3219. IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
  3220. [[fallthrough]];
  3221. }
  3222. case ISD::SETLT: {
  3223. // {subc.reg, subc.CA} = (subcarry %a, %b)
  3224. // (zext (setcc %a, %b, setlt)) ->
  3225. // (xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
  3226. // (zext (setcc %a, 0, setlt)) -> (lshr %a, 63)
  3227. if (IsRHSOne)
  3228. return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
  3229. if (IsRHSZero)
  3230. return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
  3231. S->getI64Imm(1, dl),
  3232. S->getI64Imm(63, dl)), 0);
  3233. SDValue SRADINode =
  3234. SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
  3235. LHS, S->getI64Imm(63, dl)), 0);
  3236. SDValue SRDINode =
  3237. SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
  3238. RHS, S->getI64Imm(1, dl),
  3239. S->getI64Imm(63, dl)), 0);
  3240. SDValue SUBFC8Carry =
  3241. SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
  3242. RHS, LHS), 1);
  3243. SDValue ADDE8Node =
  3244. SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
  3245. SRDINode, SRADINode, SUBFC8Carry), 0);
  3246. return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
  3247. ADDE8Node, S->getI64Imm(1, dl)), 0);
  3248. }
  3249. case ISD::SETUGE:
  3250. // {subc.reg, subc.CA} = (subcarry %a, %b)
  3251. // (zext (setcc %a, %b, setuge)) -> (add (sube %b, %b, subc.CA), 1)
  3252. std::swap(LHS, RHS);
  3253. [[fallthrough]];
  3254. case ISD::SETULE: {
  3255. // {subc.reg, subc.CA} = (subcarry %b, %a)
  3256. // (zext (setcc %a, %b, setule)) -> (add (sube %a, %a, subc.CA), 1)
  3257. SDValue SUBFC8Carry =
  3258. SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
  3259. LHS, RHS), 1);
  3260. SDValue SUBFE8Node =
  3261. SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue,
  3262. LHS, LHS, SUBFC8Carry), 0);
  3263. return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64,
  3264. SUBFE8Node, S->getI64Imm(1, dl)), 0);
  3265. }
  3266. case ISD::SETUGT:
  3267. // {subc.reg, subc.CA} = (subcarry %b, %a)
  3268. // (zext (setcc %a, %b, setugt)) -> -(sube %b, %b, subc.CA)
  3269. std::swap(LHS, RHS);
  3270. [[fallthrough]];
  3271. case ISD::SETULT: {
  3272. // {subc.reg, subc.CA} = (subcarry %a, %b)
  3273. // (zext (setcc %a, %b, setult)) -> -(sube %a, %a, subc.CA)
  3274. SDValue SubtractCarry =
  3275. SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
  3276. RHS, LHS), 1);
  3277. SDValue ExtSub =
  3278. SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64,
  3279. LHS, LHS, SubtractCarry), 0);
  3280. return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64,
  3281. ExtSub), 0);
  3282. }
  3283. }
  3284. }
  3285. /// Produces a sign-extended result of comparing two 64-bit values according to
  3286. /// the passed condition code.
  3287. SDValue
  3288. IntegerCompareEliminator::get64BitSExtCompare(SDValue LHS, SDValue RHS,
  3289. ISD::CondCode CC,
  3290. int64_t RHSValue, SDLoc dl) {
  3291. if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 ||
  3292. CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_Zext)
  3293. return SDValue();
  3294. bool IsRHSZero = RHSValue == 0;
  3295. bool IsRHSOne = RHSValue == 1;
  3296. bool IsRHSNegOne = RHSValue == -1LL;
  3297. switch (CC) {
  3298. default: return SDValue();
  3299. case ISD::SETEQ: {
  3300. // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
  3301. // (sext (setcc %a, %b, seteq)) -> (sube addc.reg, addc.reg, addc.CA)
  3302. // {addcz.reg, addcz.CA} = (addcarry %a, -1)
  3303. // (sext (setcc %a, 0, seteq)) -> (sube addcz.reg, addcz.reg, addcz.CA)
  3304. SDValue AddInput = IsRHSZero ? LHS :
  3305. SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
  3306. SDValue Addic =
  3307. SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,
  3308. AddInput, S->getI32Imm(~0U, dl)), 0);
  3309. return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, Addic,
  3310. Addic, Addic.getValue(1)), 0);
  3311. }
  3312. case ISD::SETNE: {
  3313. // {subfc.reg, subfc.CA} = (subcarry 0, (xor %a, %b))
  3314. // (sext (setcc %a, %b, setne)) -> (sube subfc.reg, subfc.reg, subfc.CA)
  3315. // {subfcz.reg, subfcz.CA} = (subcarry 0, %a)
  3316. // (sext (setcc %a, 0, setne)) -> (sube subfcz.reg, subfcz.reg, subfcz.CA)
  3317. SDValue Xor = IsRHSZero ? LHS :
  3318. SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
  3319. SDValue SC =
  3320. SDValue(CurDAG->getMachineNode(PPC::SUBFIC8, dl, MVT::i64, MVT::Glue,
  3321. Xor, S->getI32Imm(0, dl)), 0);
  3322. return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, SC,
  3323. SC, SC.getValue(1)), 0);
  3324. }
  3325. case ISD::SETGE: {
  3326. // {subc.reg, subc.CA} = (subcarry %a, %b)
  3327. // (zext (setcc %a, %b, setge)) ->
  3328. // (- (adde (lshr %b, 63), (ashr %a, 63), subc.CA))
  3329. // (zext (setcc %a, 0, setge)) -> (~ (ashr %a, 63))
  3330. if (IsRHSZero)
  3331. return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
  3332. std::swap(LHS, RHS);
  3333. ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
  3334. IsRHSZero = RHSConst && RHSConst->isZero();
  3335. [[fallthrough]];
  3336. }
  3337. case ISD::SETLE: {
  3338. // {subc.reg, subc.CA} = (subcarry %b, %a)
  3339. // (zext (setcc %a, %b, setge)) ->
  3340. // (- (adde (lshr %a, 63), (ashr %b, 63), subc.CA))
  3341. // (zext (setcc %a, 0, setge)) -> (ashr (or %a, (add %a, -1)), 63)
  3342. if (IsRHSZero)
  3343. return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
  3344. SDValue ShiftR =
  3345. SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS,
  3346. S->getI64Imm(63, dl)), 0);
  3347. SDValue ShiftL =
  3348. SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
  3349. S->getI64Imm(1, dl),
  3350. S->getI64Imm(63, dl)), 0);
  3351. SDValue SubtractCarry =
  3352. SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
  3353. LHS, RHS), 1);
  3354. SDValue Adde =
  3355. SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
  3356. ShiftR, ShiftL, SubtractCarry), 0);
  3357. return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, Adde), 0);
  3358. }
  3359. case ISD::SETGT: {
  3360. // {subc.reg, subc.CA} = (subcarry %b, %a)
  3361. // (zext (setcc %a, %b, setgt)) ->
  3362. // -(xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
  3363. // (zext (setcc %a, 0, setgt)) -> (ashr (nor (add %a, -1), %a), 63)
  3364. if (IsRHSNegOne)
  3365. return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
  3366. if (IsRHSZero) {
  3367. SDValue Add =
  3368. SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
  3369. S->getI64Imm(-1, dl)), 0);
  3370. SDValue Nor =
  3371. SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Add, LHS), 0);
  3372. return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Nor,
  3373. S->getI64Imm(63, dl)), 0);
  3374. }
  3375. std::swap(LHS, RHS);
  3376. ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
  3377. IsRHSZero = RHSConst && RHSConst->isZero();
  3378. IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
  3379. [[fallthrough]];
  3380. }
  3381. case ISD::SETLT: {
  3382. // {subc.reg, subc.CA} = (subcarry %a, %b)
  3383. // (zext (setcc %a, %b, setlt)) ->
  3384. // -(xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
  3385. // (zext (setcc %a, 0, setlt)) -> (ashr %a, 63)
  3386. if (IsRHSOne)
  3387. return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
  3388. if (IsRHSZero) {
  3389. return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, LHS,
  3390. S->getI64Imm(63, dl)), 0);
  3391. }
  3392. SDValue SRADINode =
  3393. SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
  3394. LHS, S->getI64Imm(63, dl)), 0);
  3395. SDValue SRDINode =
  3396. SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
  3397. RHS, S->getI64Imm(1, dl),
  3398. S->getI64Imm(63, dl)), 0);
  3399. SDValue SUBFC8Carry =
  3400. SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
  3401. RHS, LHS), 1);
  3402. SDValue ADDE8Node =
  3403. SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64,
  3404. SRDINode, SRADINode, SUBFC8Carry), 0);
  3405. SDValue XORI8Node =
  3406. SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
  3407. ADDE8Node, S->getI64Imm(1, dl)), 0);
  3408. return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64,
  3409. XORI8Node), 0);
  3410. }
  3411. case ISD::SETUGE:
  3412. // {subc.reg, subc.CA} = (subcarry %a, %b)
  3413. // (sext (setcc %a, %b, setuge)) -> ~(sube %b, %b, subc.CA)
  3414. std::swap(LHS, RHS);
  3415. [[fallthrough]];
  3416. case ISD::SETULE: {
  3417. // {subc.reg, subc.CA} = (subcarry %b, %a)
  3418. // (sext (setcc %a, %b, setule)) -> ~(sube %a, %a, subc.CA)
  3419. SDValue SubtractCarry =
  3420. SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
  3421. LHS, RHS), 1);
  3422. SDValue ExtSub =
  3423. SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue, LHS,
  3424. LHS, SubtractCarry), 0);
  3425. return SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64,
  3426. ExtSub, ExtSub), 0);
  3427. }
  3428. case ISD::SETUGT:
  3429. // {subc.reg, subc.CA} = (subcarry %b, %a)
  3430. // (sext (setcc %a, %b, setugt)) -> (sube %b, %b, subc.CA)
  3431. std::swap(LHS, RHS);
  3432. [[fallthrough]];
  3433. case ISD::SETULT: {
  3434. // {subc.reg, subc.CA} = (subcarry %a, %b)
  3435. // (sext (setcc %a, %b, setult)) -> (sube %a, %a, subc.CA)
  3436. SDValue SubCarry =
  3437. SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
  3438. RHS, LHS), 1);
  3439. return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64,
  3440. LHS, LHS, SubCarry), 0);
  3441. }
  3442. }
  3443. }
  3444. /// Do all uses of this SDValue need the result in a GPR?
  3445. /// This is meant to be used on values that have type i1 since
  3446. /// it is somewhat meaningless to ask if values of other types
  3447. /// should be kept in GPR's.
  3448. static bool allUsesExtend(SDValue Compare, SelectionDAG *CurDAG) {
  3449. assert(Compare.getOpcode() == ISD::SETCC &&
  3450. "An ISD::SETCC node required here.");
  3451. // For values that have a single use, the caller should obviously already have
  3452. // checked if that use is an extending use. We check the other uses here.
  3453. if (Compare.hasOneUse())
  3454. return true;
  3455. // We want the value in a GPR if it is being extended, used for a select, or
  3456. // used in logical operations.
  3457. for (auto *CompareUse : Compare.getNode()->uses())
  3458. if (CompareUse->getOpcode() != ISD::SIGN_EXTEND &&
  3459. CompareUse->getOpcode() != ISD::ZERO_EXTEND &&
  3460. CompareUse->getOpcode() != ISD::SELECT &&
  3461. !isLogicOp(CompareUse->getOpcode())) {
  3462. OmittedForNonExtendUses++;
  3463. return false;
  3464. }
  3465. return true;
  3466. }
  3467. /// Returns an equivalent of a SETCC node but with the result the same width as
  3468. /// the inputs. This can also be used for SELECT_CC if either the true or false
  3469. /// values is a power of two while the other is zero.
  3470. SDValue IntegerCompareEliminator::getSETCCInGPR(SDValue Compare,
  3471. SetccInGPROpts ConvOpts) {
  3472. assert((Compare.getOpcode() == ISD::SETCC ||
  3473. Compare.getOpcode() == ISD::SELECT_CC) &&
  3474. "An ISD::SETCC node required here.");
  3475. // Don't convert this comparison to a GPR sequence because there are uses
  3476. // of the i1 result (i.e. uses that require the result in the CR).
  3477. if ((Compare.getOpcode() == ISD::SETCC) && !allUsesExtend(Compare, CurDAG))
  3478. return SDValue();
  3479. SDValue LHS = Compare.getOperand(0);
  3480. SDValue RHS = Compare.getOperand(1);
  3481. // The condition code is operand 2 for SETCC and operand 4 for SELECT_CC.
  3482. int CCOpNum = Compare.getOpcode() == ISD::SELECT_CC ? 4 : 2;
  3483. ISD::CondCode CC =
  3484. cast<CondCodeSDNode>(Compare.getOperand(CCOpNum))->get();
  3485. EVT InputVT = LHS.getValueType();
  3486. if (InputVT != MVT::i32 && InputVT != MVT::i64)
  3487. return SDValue();
  3488. if (ConvOpts == SetccInGPROpts::ZExtInvert ||
  3489. ConvOpts == SetccInGPROpts::SExtInvert)
  3490. CC = ISD::getSetCCInverse(CC, InputVT);
  3491. bool Inputs32Bit = InputVT == MVT::i32;
  3492. SDLoc dl(Compare);
  3493. ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
  3494. int64_t RHSValue = RHSConst ? RHSConst->getSExtValue() : INT64_MAX;
  3495. bool IsSext = ConvOpts == SetccInGPROpts::SExtOrig ||
  3496. ConvOpts == SetccInGPROpts::SExtInvert;
  3497. if (IsSext && Inputs32Bit)
  3498. return get32BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
  3499. else if (Inputs32Bit)
  3500. return get32BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
  3501. else if (IsSext)
  3502. return get64BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
  3503. return get64BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
  3504. }
  3505. } // end anonymous namespace
  3506. bool PPCDAGToDAGISel::tryIntCompareInGPR(SDNode *N) {
  3507. if (N->getValueType(0) != MVT::i32 &&
  3508. N->getValueType(0) != MVT::i64)
  3509. return false;
  3510. // This optimization will emit code that assumes 64-bit registers
  3511. // so we don't want to run it in 32-bit mode. Also don't run it
  3512. // on functions that are not to be optimized.
  3513. if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64())
  3514. return false;
  3515. // For POWER10, it is more profitable to use the set boolean extension
  3516. // instructions rather than the integer compare elimination codegen.
  3517. // Users can override this via the command line option, `--ppc-gpr-icmps`.
  3518. if (!(CmpInGPR.getNumOccurrences() > 0) && Subtarget->isISA3_1())
  3519. return false;
  3520. switch (N->getOpcode()) {
  3521. default: break;
  3522. case ISD::ZERO_EXTEND:
  3523. case ISD::SIGN_EXTEND:
  3524. case ISD::AND:
  3525. case ISD::OR:
  3526. case ISD::XOR: {
  3527. IntegerCompareEliminator ICmpElim(CurDAG, this);
  3528. if (SDNode *New = ICmpElim.Select(N)) {
  3529. ReplaceNode(N, New);
  3530. return true;
  3531. }
  3532. }
  3533. }
  3534. return false;
  3535. }
  3536. bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) {
  3537. if (N->getValueType(0) != MVT::i32 &&
  3538. N->getValueType(0) != MVT::i64)
  3539. return false;
  3540. if (!UseBitPermRewriter)
  3541. return false;
  3542. switch (N->getOpcode()) {
  3543. default: break;
  3544. case ISD::SRL:
  3545. // If we are on P10, we have a pattern for 32-bit (srl (bswap r), 16) that
  3546. // uses the BRH instruction.
  3547. if (Subtarget->isISA3_1() && N->getValueType(0) == MVT::i32 &&
  3548. N->getOperand(0).getOpcode() == ISD::BSWAP) {
  3549. auto &OpRight = N->getOperand(1);
  3550. ConstantSDNode *SRLConst = dyn_cast<ConstantSDNode>(OpRight);
  3551. if (SRLConst && SRLConst->getSExtValue() == 16)
  3552. return false;
  3553. }
  3554. LLVM_FALLTHROUGH;
  3555. case ISD::ROTL:
  3556. case ISD::SHL:
  3557. case ISD::AND:
  3558. case ISD::OR: {
  3559. BitPermutationSelector BPS(CurDAG);
  3560. if (SDNode *New = BPS.Select(N)) {
  3561. ReplaceNode(N, New);
  3562. return true;
  3563. }
  3564. return false;
  3565. }
  3566. }
  3567. return false;
  3568. }
  3569. /// SelectCC - Select a comparison of the specified values with the specified
  3570. /// condition code, returning the CR# of the expression.
  3571. SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
  3572. const SDLoc &dl, SDValue Chain) {
  3573. // Always select the LHS.
  3574. unsigned Opc;
  3575. if (LHS.getValueType() == MVT::i32) {
  3576. unsigned Imm;
  3577. if (CC == ISD::SETEQ || CC == ISD::SETNE) {
  3578. if (isInt32Immediate(RHS, Imm)) {
  3579. // SETEQ/SETNE comparison with 16-bit immediate, fold it.
  3580. if (isUInt<16>(Imm))
  3581. return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
  3582. getI32Imm(Imm & 0xFFFF, dl)),
  3583. 0);
  3584. // If this is a 16-bit signed immediate, fold it.
  3585. if (isInt<16>((int)Imm))
  3586. return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
  3587. getI32Imm(Imm & 0xFFFF, dl)),
  3588. 0);
  3589. // For non-equality comparisons, the default code would materialize the
  3590. // constant, then compare against it, like this:
  3591. // lis r2, 4660
  3592. // ori r2, r2, 22136
  3593. // cmpw cr0, r3, r2
  3594. // Since we are just comparing for equality, we can emit this instead:
  3595. // xoris r0,r3,0x1234
  3596. // cmplwi cr0,r0,0x5678
  3597. // beq cr0,L6
  3598. SDValue Xor(CurDAG->getMachineNode(PPC::XORIS, dl, MVT::i32, LHS,
  3599. getI32Imm(Imm >> 16, dl)), 0);
  3600. return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, Xor,
  3601. getI32Imm(Imm & 0xFFFF, dl)), 0);
  3602. }
  3603. Opc = PPC::CMPLW;
  3604. } else if (ISD::isUnsignedIntSetCC(CC)) {
  3605. if (isInt32Immediate(RHS, Imm) && isUInt<16>(Imm))
  3606. return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
  3607. getI32Imm(Imm & 0xFFFF, dl)), 0);
  3608. Opc = PPC::CMPLW;
  3609. } else {
  3610. int16_t SImm;
  3611. if (isIntS16Immediate(RHS, SImm))
  3612. return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
  3613. getI32Imm((int)SImm & 0xFFFF,
  3614. dl)),
  3615. 0);
  3616. Opc = PPC::CMPW;
  3617. }
  3618. } else if (LHS.getValueType() == MVT::i64) {
  3619. uint64_t Imm;
  3620. if (CC == ISD::SETEQ || CC == ISD::SETNE) {
  3621. if (isInt64Immediate(RHS.getNode(), Imm)) {
  3622. // SETEQ/SETNE comparison with 16-bit immediate, fold it.
  3623. if (isUInt<16>(Imm))
  3624. return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
  3625. getI32Imm(Imm & 0xFFFF, dl)),
  3626. 0);
  3627. // If this is a 16-bit signed immediate, fold it.
  3628. if (isInt<16>(Imm))
  3629. return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
  3630. getI32Imm(Imm & 0xFFFF, dl)),
  3631. 0);
  3632. // For non-equality comparisons, the default code would materialize the
  3633. // constant, then compare against it, like this:
  3634. // lis r2, 4660
  3635. // ori r2, r2, 22136
  3636. // cmpd cr0, r3, r2
  3637. // Since we are just comparing for equality, we can emit this instead:
  3638. // xoris r0,r3,0x1234
  3639. // cmpldi cr0,r0,0x5678
  3640. // beq cr0,L6
  3641. if (isUInt<32>(Imm)) {
  3642. SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS,
  3643. getI64Imm(Imm >> 16, dl)), 0);
  3644. return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor,
  3645. getI64Imm(Imm & 0xFFFF, dl)),
  3646. 0);
  3647. }
  3648. }
  3649. Opc = PPC::CMPLD;
  3650. } else if (ISD::isUnsignedIntSetCC(CC)) {
  3651. if (isInt64Immediate(RHS.getNode(), Imm) && isUInt<16>(Imm))
  3652. return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
  3653. getI64Imm(Imm & 0xFFFF, dl)), 0);
  3654. Opc = PPC::CMPLD;
  3655. } else {
  3656. int16_t SImm;
  3657. if (isIntS16Immediate(RHS, SImm))
  3658. return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
  3659. getI64Imm(SImm & 0xFFFF, dl)),
  3660. 0);
  3661. Opc = PPC::CMPD;
  3662. }
  3663. } else if (LHS.getValueType() == MVT::f32) {
  3664. if (Subtarget->hasSPE()) {
  3665. switch (CC) {
  3666. default:
  3667. case ISD::SETEQ:
  3668. case ISD::SETNE:
  3669. Opc = PPC::EFSCMPEQ;
  3670. break;
  3671. case ISD::SETLT:
  3672. case ISD::SETGE:
  3673. case ISD::SETOLT:
  3674. case ISD::SETOGE:
  3675. case ISD::SETULT:
  3676. case ISD::SETUGE:
  3677. Opc = PPC::EFSCMPLT;
  3678. break;
  3679. case ISD::SETGT:
  3680. case ISD::SETLE:
  3681. case ISD::SETOGT:
  3682. case ISD::SETOLE:
  3683. case ISD::SETUGT:
  3684. case ISD::SETULE:
  3685. Opc = PPC::EFSCMPGT;
  3686. break;
  3687. }
  3688. } else
  3689. Opc = PPC::FCMPUS;
  3690. } else if (LHS.getValueType() == MVT::f64) {
  3691. if (Subtarget->hasSPE()) {
  3692. switch (CC) {
  3693. default:
  3694. case ISD::SETEQ:
  3695. case ISD::SETNE:
  3696. Opc = PPC::EFDCMPEQ;
  3697. break;
  3698. case ISD::SETLT:
  3699. case ISD::SETGE:
  3700. case ISD::SETOLT:
  3701. case ISD::SETOGE:
  3702. case ISD::SETULT:
  3703. case ISD::SETUGE:
  3704. Opc = PPC::EFDCMPLT;
  3705. break;
  3706. case ISD::SETGT:
  3707. case ISD::SETLE:
  3708. case ISD::SETOGT:
  3709. case ISD::SETOLE:
  3710. case ISD::SETUGT:
  3711. case ISD::SETULE:
  3712. Opc = PPC::EFDCMPGT;
  3713. break;
  3714. }
  3715. } else
  3716. Opc = Subtarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD;
  3717. } else {
  3718. assert(LHS.getValueType() == MVT::f128 && "Unknown vt!");
  3719. assert(Subtarget->hasP9Vector() && "XSCMPUQP requires Power9 Vector");
  3720. Opc = PPC::XSCMPUQP;
  3721. }
  3722. if (Chain)
  3723. return SDValue(
  3724. CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::Other, LHS, RHS, Chain),
  3725. 0);
  3726. else
  3727. return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0);
  3728. }
  3729. static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC, const EVT &VT,
  3730. const PPCSubtarget *Subtarget) {
  3731. // For SPE instructions, the result is in GT bit of the CR
  3732. bool UseSPE = Subtarget->hasSPE() && VT.isFloatingPoint();
  3733. switch (CC) {
  3734. case ISD::SETUEQ:
  3735. case ISD::SETONE:
  3736. case ISD::SETOLE:
  3737. case ISD::SETOGE:
  3738. llvm_unreachable("Should be lowered by legalize!");
  3739. default: llvm_unreachable("Unknown condition!");
  3740. case ISD::SETOEQ:
  3741. case ISD::SETEQ:
  3742. return UseSPE ? PPC::PRED_GT : PPC::PRED_EQ;
  3743. case ISD::SETUNE:
  3744. case ISD::SETNE:
  3745. return UseSPE ? PPC::PRED_LE : PPC::PRED_NE;
  3746. case ISD::SETOLT:
  3747. case ISD::SETLT:
  3748. return UseSPE ? PPC::PRED_GT : PPC::PRED_LT;
  3749. case ISD::SETULE:
  3750. case ISD::SETLE:
  3751. return PPC::PRED_LE;
  3752. case ISD::SETOGT:
  3753. case ISD::SETGT:
  3754. return PPC::PRED_GT;
  3755. case ISD::SETUGE:
  3756. case ISD::SETGE:
  3757. return UseSPE ? PPC::PRED_LE : PPC::PRED_GE;
  3758. case ISD::SETO: return PPC::PRED_NU;
  3759. case ISD::SETUO: return PPC::PRED_UN;
  3760. // These two are invalid for floating point. Assume we have int.
  3761. case ISD::SETULT: return PPC::PRED_LT;
  3762. case ISD::SETUGT: return PPC::PRED_GT;
  3763. }
  3764. }
  3765. /// getCRIdxForSetCC - Return the index of the condition register field
  3766. /// associated with the SetCC condition, and whether or not the field is
  3767. /// treated as inverted. That is, lt = 0; ge = 0 inverted.
  3768. static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) {
  3769. Invert = false;
  3770. switch (CC) {
  3771. default: llvm_unreachable("Unknown condition!");
  3772. case ISD::SETOLT:
  3773. case ISD::SETLT: return 0; // Bit #0 = SETOLT
  3774. case ISD::SETOGT:
  3775. case ISD::SETGT: return 1; // Bit #1 = SETOGT
  3776. case ISD::SETOEQ:
  3777. case ISD::SETEQ: return 2; // Bit #2 = SETOEQ
  3778. case ISD::SETUO: return 3; // Bit #3 = SETUO
  3779. case ISD::SETUGE:
  3780. case ISD::SETGE: Invert = true; return 0; // !Bit #0 = SETUGE
  3781. case ISD::SETULE:
  3782. case ISD::SETLE: Invert = true; return 1; // !Bit #1 = SETULE
  3783. case ISD::SETUNE:
  3784. case ISD::SETNE: Invert = true; return 2; // !Bit #2 = SETUNE
  3785. case ISD::SETO: Invert = true; return 3; // !Bit #3 = SETO
  3786. case ISD::SETUEQ:
  3787. case ISD::SETOGE:
  3788. case ISD::SETOLE:
  3789. case ISD::SETONE:
  3790. llvm_unreachable("Invalid branch code: should be expanded by legalize");
  3791. // These are invalid for floating point. Assume integer.
  3792. case ISD::SETULT: return 0;
  3793. case ISD::SETUGT: return 1;
  3794. }
  3795. }
  3796. // getVCmpInst: return the vector compare instruction for the specified
  3797. // vector type and condition code. Since this is for altivec specific code,
  3798. // only support the altivec types (v16i8, v8i16, v4i32, v2i64, v1i128,
  3799. // and v4f32).
  3800. static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
  3801. bool HasVSX, bool &Swap, bool &Negate) {
  3802. Swap = false;
  3803. Negate = false;
  3804. if (VecVT.isFloatingPoint()) {
  3805. /* Handle some cases by swapping input operands. */
  3806. switch (CC) {
  3807. case ISD::SETLE: CC = ISD::SETGE; Swap = true; break;
  3808. case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
  3809. case ISD::SETOLE: CC = ISD::SETOGE; Swap = true; break;
  3810. case ISD::SETOLT: CC = ISD::SETOGT; Swap = true; break;
  3811. case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
  3812. case ISD::SETUGT: CC = ISD::SETULT; Swap = true; break;
  3813. default: break;
  3814. }
  3815. /* Handle some cases by negating the result. */
  3816. switch (CC) {
  3817. case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
  3818. case ISD::SETUNE: CC = ISD::SETOEQ; Negate = true; break;
  3819. case ISD::SETULE: CC = ISD::SETOGT; Negate = true; break;
  3820. case ISD::SETULT: CC = ISD::SETOGE; Negate = true; break;
  3821. default: break;
  3822. }
  3823. /* We have instructions implementing the remaining cases. */
  3824. switch (CC) {
  3825. case ISD::SETEQ:
  3826. case ISD::SETOEQ:
  3827. if (VecVT == MVT::v4f32)
  3828. return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
  3829. else if (VecVT == MVT::v2f64)
  3830. return PPC::XVCMPEQDP;
  3831. break;
  3832. case ISD::SETGT:
  3833. case ISD::SETOGT:
  3834. if (VecVT == MVT::v4f32)
  3835. return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
  3836. else if (VecVT == MVT::v2f64)
  3837. return PPC::XVCMPGTDP;
  3838. break;
  3839. case ISD::SETGE:
  3840. case ISD::SETOGE:
  3841. if (VecVT == MVT::v4f32)
  3842. return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP;
  3843. else if (VecVT == MVT::v2f64)
  3844. return PPC::XVCMPGEDP;
  3845. break;
  3846. default:
  3847. break;
  3848. }
  3849. llvm_unreachable("Invalid floating-point vector compare condition");
  3850. } else {
  3851. /* Handle some cases by swapping input operands. */
  3852. switch (CC) {
  3853. case ISD::SETGE: CC = ISD::SETLE; Swap = true; break;
  3854. case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
  3855. case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
  3856. case ISD::SETULT: CC = ISD::SETUGT; Swap = true; break;
  3857. default: break;
  3858. }
  3859. /* Handle some cases by negating the result. */
  3860. switch (CC) {
  3861. case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
  3862. case ISD::SETUNE: CC = ISD::SETUEQ; Negate = true; break;
  3863. case ISD::SETLE: CC = ISD::SETGT; Negate = true; break;
  3864. case ISD::SETULE: CC = ISD::SETUGT; Negate = true; break;
  3865. default: break;
  3866. }
  3867. /* We have instructions implementing the remaining cases. */
  3868. switch (CC) {
  3869. case ISD::SETEQ:
  3870. case ISD::SETUEQ:
  3871. if (VecVT == MVT::v16i8)
  3872. return PPC::VCMPEQUB;
  3873. else if (VecVT == MVT::v8i16)
  3874. return PPC::VCMPEQUH;
  3875. else if (VecVT == MVT::v4i32)
  3876. return PPC::VCMPEQUW;
  3877. else if (VecVT == MVT::v2i64)
  3878. return PPC::VCMPEQUD;
  3879. else if (VecVT == MVT::v1i128)
  3880. return PPC::VCMPEQUQ;
  3881. break;
  3882. case ISD::SETGT:
  3883. if (VecVT == MVT::v16i8)
  3884. return PPC::VCMPGTSB;
  3885. else if (VecVT == MVT::v8i16)
  3886. return PPC::VCMPGTSH;
  3887. else if (VecVT == MVT::v4i32)
  3888. return PPC::VCMPGTSW;
  3889. else if (VecVT == MVT::v2i64)
  3890. return PPC::VCMPGTSD;
  3891. else if (VecVT == MVT::v1i128)
  3892. return PPC::VCMPGTSQ;
  3893. break;
  3894. case ISD::SETUGT:
  3895. if (VecVT == MVT::v16i8)
  3896. return PPC::VCMPGTUB;
  3897. else if (VecVT == MVT::v8i16)
  3898. return PPC::VCMPGTUH;
  3899. else if (VecVT == MVT::v4i32)
  3900. return PPC::VCMPGTUW;
  3901. else if (VecVT == MVT::v2i64)
  3902. return PPC::VCMPGTUD;
  3903. else if (VecVT == MVT::v1i128)
  3904. return PPC::VCMPGTUQ;
  3905. break;
  3906. default:
  3907. break;
  3908. }
  3909. llvm_unreachable("Invalid integer vector compare condition");
  3910. }
  3911. }
  3912. bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
  3913. SDLoc dl(N);
  3914. unsigned Imm;
  3915. bool IsStrict = N->isStrictFPOpcode();
  3916. ISD::CondCode CC =
  3917. cast<CondCodeSDNode>(N->getOperand(IsStrict ? 3 : 2))->get();
  3918. EVT PtrVT =
  3919. CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());
  3920. bool isPPC64 = (PtrVT == MVT::i64);
  3921. SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
  3922. SDValue LHS = N->getOperand(IsStrict ? 1 : 0);
  3923. SDValue RHS = N->getOperand(IsStrict ? 2 : 1);
  3924. if (!IsStrict && !Subtarget->useCRBits() && isInt32Immediate(RHS, Imm)) {
  3925. // We can codegen setcc op, imm very efficiently compared to a brcond.
  3926. // Check for those cases here.
  3927. // setcc op, 0
  3928. if (Imm == 0) {
  3929. SDValue Op = LHS;
  3930. switch (CC) {
  3931. default: break;
  3932. case ISD::SETEQ: {
  3933. Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0);
  3934. SDValue Ops[] = { Op, getI32Imm(27, dl), getI32Imm(5, dl),
  3935. getI32Imm(31, dl) };
  3936. CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
  3937. return true;
  3938. }
  3939. case ISD::SETNE: {
  3940. if (isPPC64) break;
  3941. SDValue AD =
  3942. SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
  3943. Op, getI32Imm(~0U, dl)), 0);
  3944. CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op, AD.getValue(1));
  3945. return true;
  3946. }
  3947. case ISD::SETLT: {
  3948. SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
  3949. getI32Imm(31, dl) };
  3950. CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
  3951. return true;
  3952. }
  3953. case ISD::SETGT: {
  3954. SDValue T =
  3955. SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0);
  3956. T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0);
  3957. SDValue Ops[] = { T, getI32Imm(1, dl), getI32Imm(31, dl),
  3958. getI32Imm(31, dl) };
  3959. CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
  3960. return true;
  3961. }
  3962. }
  3963. } else if (Imm == ~0U) { // setcc op, -1
  3964. SDValue Op = LHS;
  3965. switch (CC) {
  3966. default: break;
  3967. case ISD::SETEQ:
  3968. if (isPPC64) break;
  3969. Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
  3970. Op, getI32Imm(1, dl)), 0);
  3971. CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
  3972. SDValue(CurDAG->getMachineNode(PPC::LI, dl,
  3973. MVT::i32,
  3974. getI32Imm(0, dl)),
  3975. 0), Op.getValue(1));
  3976. return true;
  3977. case ISD::SETNE: {
  3978. if (isPPC64) break;
  3979. Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0);
  3980. SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
  3981. Op, getI32Imm(~0U, dl));
  3982. CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0), Op,
  3983. SDValue(AD, 1));
  3984. return true;
  3985. }
  3986. case ISD::SETLT: {
  3987. SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op,
  3988. getI32Imm(1, dl)), 0);
  3989. SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD,
  3990. Op), 0);
  3991. SDValue Ops[] = { AN, getI32Imm(1, dl), getI32Imm(31, dl),
  3992. getI32Imm(31, dl) };
  3993. CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
  3994. return true;
  3995. }
  3996. case ISD::SETGT: {
  3997. SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
  3998. getI32Imm(31, dl) };
  3999. Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
  4000. CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op, getI32Imm(1, dl));
  4001. return true;
  4002. }
  4003. }
  4004. }
  4005. }
  4006. // Altivec Vector compare instructions do not set any CR register by default and
  4007. // vector compare operations return the same type as the operands.
  4008. if (!IsStrict && LHS.getValueType().isVector()) {
  4009. if (Subtarget->hasSPE())
  4010. return false;
  4011. EVT VecVT = LHS.getValueType();
  4012. bool Swap, Negate;
  4013. unsigned int VCmpInst =
  4014. getVCmpInst(VecVT.getSimpleVT(), CC, Subtarget->hasVSX(), Swap, Negate);
  4015. if (Swap)
  4016. std::swap(LHS, RHS);
  4017. EVT ResVT = VecVT.changeVectorElementTypeToInteger();
  4018. if (Negate) {
  4019. SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, ResVT, LHS, RHS), 0);
  4020. CurDAG->SelectNodeTo(N, Subtarget->hasVSX() ? PPC::XXLNOR : PPC::VNOR,
  4021. ResVT, VCmp, VCmp);
  4022. return true;
  4023. }
  4024. CurDAG->SelectNodeTo(N, VCmpInst, ResVT, LHS, RHS);
  4025. return true;
  4026. }
  4027. if (Subtarget->useCRBits())
  4028. return false;
  4029. bool Inv;
  4030. unsigned Idx = getCRIdxForSetCC(CC, Inv);
  4031. SDValue CCReg = SelectCC(LHS, RHS, CC, dl, Chain);
  4032. if (IsStrict)
  4033. CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), CCReg.getValue(1));
  4034. SDValue IntCR;
  4035. // SPE e*cmp* instructions only set the 'gt' bit, so hard-code that
  4036. // The correct compare instruction is already set by SelectCC()
  4037. if (Subtarget->hasSPE() && LHS.getValueType().isFloatingPoint()) {
  4038. Idx = 1;
  4039. }
  4040. // Force the ccreg into CR7.
  4041. SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32);
  4042. SDValue InFlag; // Null incoming flag value.
  4043. CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg,
  4044. InFlag).getValue(1);
  4045. IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,
  4046. CCReg), 0);
  4047. SDValue Ops[] = { IntCR, getI32Imm((32 - (3 - Idx)) & 31, dl),
  4048. getI32Imm(31, dl), getI32Imm(31, dl) };
  4049. if (!Inv) {
  4050. CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
  4051. return true;
  4052. }
  4053. // Get the specified bit.
  4054. SDValue Tmp =
  4055. SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
  4056. CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1, dl));
  4057. return true;
  4058. }
  4059. /// Does this node represent a load/store node whose address can be represented
  4060. /// with a register plus an immediate that's a multiple of \p Val:
  4061. bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const {
  4062. LoadSDNode *LDN = dyn_cast<LoadSDNode>(N);
  4063. StoreSDNode *STN = dyn_cast<StoreSDNode>(N);
  4064. MemIntrinsicSDNode *MIN = dyn_cast<MemIntrinsicSDNode>(N);
  4065. SDValue AddrOp;
  4066. if (LDN || (MIN && MIN->getOpcode() == PPCISD::LD_SPLAT))
  4067. AddrOp = N->getOperand(1);
  4068. else if (STN)
  4069. AddrOp = STN->getOperand(2);
  4070. // If the address points a frame object or a frame object with an offset,
  4071. // we need to check the object alignment.
  4072. short Imm = 0;
  4073. if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(
  4074. AddrOp.getOpcode() == ISD::ADD ? AddrOp.getOperand(0) :
  4075. AddrOp)) {
  4076. // If op0 is a frame index that is under aligned, we can't do it either,
  4077. // because it is translated to r31 or r1 + slot + offset. We won't know the
  4078. // slot number until the stack frame is finalized.
  4079. const MachineFrameInfo &MFI = CurDAG->getMachineFunction().getFrameInfo();
  4080. unsigned SlotAlign = MFI.getObjectAlign(FI->getIndex()).value();
  4081. if ((SlotAlign % Val) != 0)
  4082. return false;
  4083. // If we have an offset, we need further check on the offset.
  4084. if (AddrOp.getOpcode() != ISD::ADD)
  4085. return true;
  4086. }
  4087. if (AddrOp.getOpcode() == ISD::ADD)
  4088. return isIntS16Immediate(AddrOp.getOperand(1), Imm) && !(Imm % Val);
  4089. // If the address comes from the outside, the offset will be zero.
  4090. return AddrOp.getOpcode() == ISD::CopyFromReg;
  4091. }
  4092. void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
  4093. // Transfer memoperands.
  4094. MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
  4095. CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
  4096. }
  4097. static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG,
  4098. bool &NeedSwapOps, bool &IsUnCmp) {
  4099. assert(N->getOpcode() == ISD::SELECT_CC && "Expecting a SELECT_CC here.");
  4100. SDValue LHS = N->getOperand(0);
  4101. SDValue RHS = N->getOperand(1);
  4102. SDValue TrueRes = N->getOperand(2);
  4103. SDValue FalseRes = N->getOperand(3);
  4104. ConstantSDNode *TrueConst = dyn_cast<ConstantSDNode>(TrueRes);
  4105. if (!TrueConst || (N->getSimpleValueType(0) != MVT::i64 &&
  4106. N->getSimpleValueType(0) != MVT::i32))
  4107. return false;
  4108. // We are looking for any of:
  4109. // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, cc2)), cc1)
  4110. // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, cc2)), cc1)
  4111. // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, 1, -1, cc2), seteq)
  4112. // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, -1, 1, cc2), seteq)
  4113. int64_t TrueResVal = TrueConst->getSExtValue();
  4114. if ((TrueResVal < -1 || TrueResVal > 1) ||
  4115. (TrueResVal == -1 && FalseRes.getOpcode() != ISD::ZERO_EXTEND) ||
  4116. (TrueResVal == 1 && FalseRes.getOpcode() != ISD::SIGN_EXTEND) ||
  4117. (TrueResVal == 0 &&
  4118. (FalseRes.getOpcode() != ISD::SELECT_CC || CC != ISD::SETEQ)))
  4119. return false;
  4120. SDValue SetOrSelCC = FalseRes.getOpcode() == ISD::SELECT_CC
  4121. ? FalseRes
  4122. : FalseRes.getOperand(0);
  4123. bool InnerIsSel = SetOrSelCC.getOpcode() == ISD::SELECT_CC;
  4124. if (SetOrSelCC.getOpcode() != ISD::SETCC &&
  4125. SetOrSelCC.getOpcode() != ISD::SELECT_CC)
  4126. return false;
  4127. // Without this setb optimization, the outer SELECT_CC will be manually
  4128. // selected to SELECT_CC_I4/SELECT_CC_I8 Pseudo, then expand-isel-pseudos pass
  4129. // transforms pseudo instruction to isel instruction. When there are more than
  4130. // one use for result like zext/sext, with current optimization we only see
  4131. // isel is replaced by setb but can't see any significant gain. Since
  4132. // setb has longer latency than original isel, we should avoid this. Another
  4133. // point is that setb requires comparison always kept, it can break the
  4134. // opportunity to get the comparison away if we have in future.
  4135. if (!SetOrSelCC.hasOneUse() || (!InnerIsSel && !FalseRes.hasOneUse()))
  4136. return false;
  4137. SDValue InnerLHS = SetOrSelCC.getOperand(0);
  4138. SDValue InnerRHS = SetOrSelCC.getOperand(1);
  4139. ISD::CondCode InnerCC =
  4140. cast<CondCodeSDNode>(SetOrSelCC.getOperand(InnerIsSel ? 4 : 2))->get();
  4141. // If the inner comparison is a select_cc, make sure the true/false values are
  4142. // 1/-1 and canonicalize it if needed.
  4143. if (InnerIsSel) {
  4144. ConstantSDNode *SelCCTrueConst =
  4145. dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(2));
  4146. ConstantSDNode *SelCCFalseConst =
  4147. dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(3));
  4148. if (!SelCCTrueConst || !SelCCFalseConst)
  4149. return false;
  4150. int64_t SelCCTVal = SelCCTrueConst->getSExtValue();
  4151. int64_t SelCCFVal = SelCCFalseConst->getSExtValue();
  4152. // The values must be -1/1 (requiring a swap) or 1/-1.
  4153. if (SelCCTVal == -1 && SelCCFVal == 1) {
  4154. std::swap(InnerLHS, InnerRHS);
  4155. } else if (SelCCTVal != 1 || SelCCFVal != -1)
  4156. return false;
  4157. }
  4158. // Canonicalize unsigned case
  4159. if (InnerCC == ISD::SETULT || InnerCC == ISD::SETUGT) {
  4160. IsUnCmp = true;
  4161. InnerCC = (InnerCC == ISD::SETULT) ? ISD::SETLT : ISD::SETGT;
  4162. }
  4163. bool InnerSwapped = false;
  4164. if (LHS == InnerRHS && RHS == InnerLHS)
  4165. InnerSwapped = true;
  4166. else if (LHS != InnerLHS || RHS != InnerRHS)
  4167. return false;
  4168. switch (CC) {
  4169. // (select_cc lhs, rhs, 0, \
  4170. // (select_cc [lr]hs, [lr]hs, 1, -1, setlt/setgt), seteq)
  4171. case ISD::SETEQ:
  4172. if (!InnerIsSel)
  4173. return false;
  4174. if (InnerCC != ISD::SETLT && InnerCC != ISD::SETGT)
  4175. return false;
  4176. NeedSwapOps = (InnerCC == ISD::SETGT) ? InnerSwapped : !InnerSwapped;
  4177. break;
  4178. // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
  4179. // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setgt)), setu?lt)
  4180. // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setlt)), setu?lt)
  4181. // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
  4182. // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setgt)), setu?lt)
  4183. // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setlt)), setu?lt)
  4184. case ISD::SETULT:
  4185. if (!IsUnCmp && InnerCC != ISD::SETNE)
  4186. return false;
  4187. IsUnCmp = true;
  4188. [[fallthrough]];
  4189. case ISD::SETLT:
  4190. if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETGT && !InnerSwapped) ||
  4191. (InnerCC == ISD::SETLT && InnerSwapped))
  4192. NeedSwapOps = (TrueResVal == 1);
  4193. else
  4194. return false;
  4195. break;
  4196. // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
  4197. // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setlt)), setu?gt)
  4198. // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setgt)), setu?gt)
  4199. // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
  4200. // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setlt)), setu?gt)
  4201. // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setgt)), setu?gt)
  4202. case ISD::SETUGT:
  4203. if (!IsUnCmp && InnerCC != ISD::SETNE)
  4204. return false;
  4205. IsUnCmp = true;
  4206. [[fallthrough]];
  4207. case ISD::SETGT:
  4208. if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETLT && !InnerSwapped) ||
  4209. (InnerCC == ISD::SETGT && InnerSwapped))
  4210. NeedSwapOps = (TrueResVal == -1);
  4211. else
  4212. return false;
  4213. break;
  4214. default:
  4215. return false;
  4216. }
  4217. LLVM_DEBUG(dbgs() << "Found a node that can be lowered to a SETB: ");
  4218. LLVM_DEBUG(N->dump());
  4219. return true;
  4220. }
  4221. // Return true if it's a software square-root/divide operand.
  4222. static bool isSWTestOp(SDValue N) {
  4223. if (N.getOpcode() == PPCISD::FTSQRT)
  4224. return true;
  4225. if (N.getNumOperands() < 1 || !isa<ConstantSDNode>(N.getOperand(0)) ||
  4226. N.getOpcode() != ISD::INTRINSIC_WO_CHAIN)
  4227. return false;
  4228. switch (N.getConstantOperandVal(0)) {
  4229. case Intrinsic::ppc_vsx_xvtdivdp:
  4230. case Intrinsic::ppc_vsx_xvtdivsp:
  4231. case Intrinsic::ppc_vsx_xvtsqrtdp:
  4232. case Intrinsic::ppc_vsx_xvtsqrtsp:
  4233. return true;
  4234. }
  4235. return false;
  4236. }
  4237. bool PPCDAGToDAGISel::tryFoldSWTestBRCC(SDNode *N) {
  4238. assert(N->getOpcode() == ISD::BR_CC && "ISD::BR_CC is expected.");
  4239. // We are looking for following patterns, where `truncate to i1` actually has
  4240. // the same semantic with `and 1`.
  4241. // (br_cc seteq, (truncateToi1 SWTestOp), 0) -> (BCC PRED_NU, SWTestOp)
  4242. // (br_cc seteq, (and SWTestOp, 2), 0) -> (BCC PRED_NE, SWTestOp)
  4243. // (br_cc seteq, (and SWTestOp, 4), 0) -> (BCC PRED_LE, SWTestOp)
  4244. // (br_cc seteq, (and SWTestOp, 8), 0) -> (BCC PRED_GE, SWTestOp)
  4245. // (br_cc setne, (truncateToi1 SWTestOp), 0) -> (BCC PRED_UN, SWTestOp)
  4246. // (br_cc setne, (and SWTestOp, 2), 0) -> (BCC PRED_EQ, SWTestOp)
  4247. // (br_cc setne, (and SWTestOp, 4), 0) -> (BCC PRED_GT, SWTestOp)
  4248. // (br_cc setne, (and SWTestOp, 8), 0) -> (BCC PRED_LT, SWTestOp)
  4249. ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
  4250. if (CC != ISD::SETEQ && CC != ISD::SETNE)
  4251. return false;
  4252. SDValue CmpRHS = N->getOperand(3);
  4253. if (!isa<ConstantSDNode>(CmpRHS) ||
  4254. cast<ConstantSDNode>(CmpRHS)->getSExtValue() != 0)
  4255. return false;
  4256. SDValue CmpLHS = N->getOperand(2);
  4257. if (CmpLHS.getNumOperands() < 1 || !isSWTestOp(CmpLHS.getOperand(0)))
  4258. return false;
  4259. unsigned PCC = 0;
  4260. bool IsCCNE = CC == ISD::SETNE;
  4261. if (CmpLHS.getOpcode() == ISD::AND &&
  4262. isa<ConstantSDNode>(CmpLHS.getOperand(1)))
  4263. switch (CmpLHS.getConstantOperandVal(1)) {
  4264. case 1:
  4265. PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU;
  4266. break;
  4267. case 2:
  4268. PCC = IsCCNE ? PPC::PRED_EQ : PPC::PRED_NE;
  4269. break;
  4270. case 4:
  4271. PCC = IsCCNE ? PPC::PRED_GT : PPC::PRED_LE;
  4272. break;
  4273. case 8:
  4274. PCC = IsCCNE ? PPC::PRED_LT : PPC::PRED_GE;
  4275. break;
  4276. default:
  4277. return false;
  4278. }
  4279. else if (CmpLHS.getOpcode() == ISD::TRUNCATE &&
  4280. CmpLHS.getValueType() == MVT::i1)
  4281. PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU;
  4282. if (PCC) {
  4283. SDLoc dl(N);
  4284. SDValue Ops[] = {getI32Imm(PCC, dl), CmpLHS.getOperand(0), N->getOperand(4),
  4285. N->getOperand(0)};
  4286. CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
  4287. return true;
  4288. }
  4289. return false;
  4290. }
  4291. bool PPCDAGToDAGISel::trySelectLoopCountIntrinsic(SDNode *N) {
  4292. // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
  4293. // value, for example when crbits is disabled. If so, select the
  4294. // loop_decrement intrinsics now.
  4295. ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
  4296. SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
  4297. if (LHS.getOpcode() != ISD::AND || !isa<ConstantSDNode>(LHS.getOperand(1)) ||
  4298. isNullConstant(LHS.getOperand(1)))
  4299. return false;
  4300. if (LHS.getOperand(0).getOpcode() != ISD::INTRINSIC_W_CHAIN ||
  4301. cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() !=
  4302. Intrinsic::loop_decrement)
  4303. return false;
  4304. if (!isa<ConstantSDNode>(RHS))
  4305. return false;
  4306. assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
  4307. "Counter decrement comparison is not EQ or NE");
  4308. SDValue OldDecrement = LHS.getOperand(0);
  4309. assert(OldDecrement.hasOneUse() && "loop decrement has more than one use!");
  4310. SDLoc DecrementLoc(OldDecrement);
  4311. SDValue ChainInput = OldDecrement.getOperand(0);
  4312. SDValue DecrementOps[] = {Subtarget->isPPC64() ? getI64Imm(1, DecrementLoc)
  4313. : getI32Imm(1, DecrementLoc)};
  4314. unsigned DecrementOpcode =
  4315. Subtarget->isPPC64() ? PPC::DecreaseCTR8loop : PPC::DecreaseCTRloop;
  4316. SDNode *NewDecrement = CurDAG->getMachineNode(DecrementOpcode, DecrementLoc,
  4317. MVT::i1, DecrementOps);
  4318. unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
  4319. bool IsBranchOnTrue = (CC == ISD::SETEQ && Val) || (CC == ISD::SETNE && !Val);
  4320. unsigned Opcode = IsBranchOnTrue ? PPC::BC : PPC::BCn;
  4321. ReplaceUses(LHS.getValue(0), LHS.getOperand(1));
  4322. CurDAG->RemoveDeadNode(LHS.getNode());
  4323. // Mark the old loop_decrement intrinsic as dead.
  4324. ReplaceUses(OldDecrement.getValue(1), ChainInput);
  4325. CurDAG->RemoveDeadNode(OldDecrement.getNode());
  4326. SDValue Chain = CurDAG->getNode(ISD::TokenFactor, SDLoc(N), MVT::Other,
  4327. ChainInput, N->getOperand(0));
  4328. CurDAG->SelectNodeTo(N, Opcode, MVT::Other, SDValue(NewDecrement, 0),
  4329. N->getOperand(4), Chain);
  4330. return true;
  4331. }
  4332. bool PPCDAGToDAGISel::tryAsSingleRLWINM(SDNode *N) {
  4333. assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
  4334. unsigned Imm;
  4335. if (!isInt32Immediate(N->getOperand(1), Imm))
  4336. return false;
  4337. SDLoc dl(N);
  4338. SDValue Val = N->getOperand(0);
  4339. unsigned SH, MB, ME;
  4340. // If this is an and of a value rotated between 0 and 31 bits and then and'd
  4341. // with a mask, emit rlwinm
  4342. if (isRotateAndMask(Val.getNode(), Imm, false, SH, MB, ME)) {
  4343. Val = Val.getOperand(0);
  4344. SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl),
  4345. getI32Imm(ME, dl)};
  4346. CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
  4347. return true;
  4348. }
  4349. // If this is just a masked value where the input is not handled, and
  4350. // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
  4351. if (isRunOfOnes(Imm, MB, ME) && Val.getOpcode() != ISD::ROTL) {
  4352. SDValue Ops[] = {Val, getI32Imm(0, dl), getI32Imm(MB, dl),
  4353. getI32Imm(ME, dl)};
  4354. CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
  4355. return true;
  4356. }
  4357. // AND X, 0 -> 0, not "rlwinm 32".
  4358. if (Imm == 0) {
  4359. ReplaceUses(SDValue(N, 0), N->getOperand(1));
  4360. return true;
  4361. }
  4362. return false;
  4363. }
  4364. bool PPCDAGToDAGISel::tryAsSingleRLWINM8(SDNode *N) {
  4365. assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
  4366. uint64_t Imm64;
  4367. if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64))
  4368. return false;
  4369. unsigned MB, ME;
  4370. if (isRunOfOnes64(Imm64, MB, ME) && MB >= 32 && MB <= ME) {
  4371. // MB ME
  4372. // +----------------------+
  4373. // |xxxxxxxxxxx00011111000|
  4374. // +----------------------+
  4375. // 0 32 64
  4376. // We can only do it if the MB is larger than 32 and MB <= ME
  4377. // as RLWINM will replace the contents of [0 - 32) with [32 - 64) even
  4378. // we didn't rotate it.
  4379. SDLoc dl(N);
  4380. SDValue Ops[] = {N->getOperand(0), getI64Imm(0, dl), getI64Imm(MB - 32, dl),
  4381. getI64Imm(ME - 32, dl)};
  4382. CurDAG->SelectNodeTo(N, PPC::RLWINM8, MVT::i64, Ops);
  4383. return true;
  4384. }
  4385. return false;
  4386. }
  4387. bool PPCDAGToDAGISel::tryAsPairOfRLDICL(SDNode *N) {
  4388. assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
  4389. uint64_t Imm64;
  4390. if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64))
  4391. return false;
  4392. // Do nothing if it is 16-bit imm as the pattern in the .td file handle
  4393. // it well with "andi.".
  4394. if (isUInt<16>(Imm64))
  4395. return false;
  4396. SDLoc Loc(N);
  4397. SDValue Val = N->getOperand(0);
  4398. // Optimized with two rldicl's as follows:
  4399. // Add missing bits on left to the mask and check that the mask is a
  4400. // wrapped run of ones, i.e.
  4401. // Change pattern |0001111100000011111111|
  4402. // to |1111111100000011111111|.
  4403. unsigned NumOfLeadingZeros = countLeadingZeros(Imm64);
  4404. if (NumOfLeadingZeros != 0)
  4405. Imm64 |= maskLeadingOnes<uint64_t>(NumOfLeadingZeros);
  4406. unsigned MB, ME;
  4407. if (!isRunOfOnes64(Imm64, MB, ME))
  4408. return false;
  4409. // ME MB MB-ME+63
  4410. // +----------------------+ +----------------------+
  4411. // |1111111100000011111111| -> |0000001111111111111111|
  4412. // +----------------------+ +----------------------+
  4413. // 0 63 0 63
  4414. // There are ME + 1 ones on the left and (MB - ME + 63) & 63 zeros in between.
  4415. unsigned OnesOnLeft = ME + 1;
  4416. unsigned ZerosInBetween = (MB - ME + 63) & 63;
  4417. // Rotate left by OnesOnLeft (so leading ones are now trailing ones) and clear
  4418. // on the left the bits that are already zeros in the mask.
  4419. Val = SDValue(CurDAG->getMachineNode(PPC::RLDICL, Loc, MVT::i64, Val,
  4420. getI64Imm(OnesOnLeft, Loc),
  4421. getI64Imm(ZerosInBetween, Loc)),
  4422. 0);
  4423. // MB-ME+63 ME MB
  4424. // +----------------------+ +----------------------+
  4425. // |0000001111111111111111| -> |0001111100000011111111|
  4426. // +----------------------+ +----------------------+
  4427. // 0 63 0 63
  4428. // Rotate back by 64 - OnesOnLeft to undo previous rotate. Then clear on the
  4429. // left the number of ones we previously added.
  4430. SDValue Ops[] = {Val, getI64Imm(64 - OnesOnLeft, Loc),
  4431. getI64Imm(NumOfLeadingZeros, Loc)};
  4432. CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
  4433. return true;
  4434. }
  4435. bool PPCDAGToDAGISel::tryAsSingleRLWIMI(SDNode *N) {
  4436. assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
  4437. unsigned Imm;
  4438. if (!isInt32Immediate(N->getOperand(1), Imm))
  4439. return false;
  4440. SDValue Val = N->getOperand(0);
  4441. unsigned Imm2;
  4442. // ISD::OR doesn't get all the bitfield insertion fun.
  4443. // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a
  4444. // bitfield insert.
  4445. if (Val.getOpcode() != ISD::OR || !isInt32Immediate(Val.getOperand(1), Imm2))
  4446. return false;
  4447. // The idea here is to check whether this is equivalent to:
  4448. // (c1 & m) | (x & ~m)
  4449. // where m is a run-of-ones mask. The logic here is that, for each bit in
  4450. // c1 and c2:
  4451. // - if both are 1, then the output will be 1.
  4452. // - if both are 0, then the output will be 0.
  4453. // - if the bit in c1 is 0, and the bit in c2 is 1, then the output will
  4454. // come from x.
  4455. // - if the bit in c1 is 1, and the bit in c2 is 0, then the output will
  4456. // be 0.
  4457. // If that last condition is never the case, then we can form m from the
  4458. // bits that are the same between c1 and c2.
  4459. unsigned MB, ME;
  4460. if (isRunOfOnes(~(Imm ^ Imm2), MB, ME) && !(~Imm & Imm2)) {
  4461. SDLoc dl(N);
  4462. SDValue Ops[] = {Val.getOperand(0), Val.getOperand(1), getI32Imm(0, dl),
  4463. getI32Imm(MB, dl), getI32Imm(ME, dl)};
  4464. ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
  4465. return true;
  4466. }
  4467. return false;
  4468. }
  4469. bool PPCDAGToDAGISel::tryAsSingleRLDICL(SDNode *N) {
  4470. assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
  4471. uint64_t Imm64;
  4472. if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || !isMask_64(Imm64))
  4473. return false;
  4474. // If this is a 64-bit zero-extension mask, emit rldicl.
  4475. unsigned MB = 64 - countTrailingOnes(Imm64);
  4476. unsigned SH = 0;
  4477. unsigned Imm;
  4478. SDValue Val = N->getOperand(0);
  4479. SDLoc dl(N);
  4480. if (Val.getOpcode() == ISD::ANY_EXTEND) {
  4481. auto Op0 = Val.getOperand(0);
  4482. if (Op0.getOpcode() == ISD::SRL &&
  4483. isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) {
  4484. auto ResultType = Val.getNode()->getValueType(0);
  4485. auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, ResultType);
  4486. SDValue IDVal(ImDef, 0);
  4487. Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, ResultType,
  4488. IDVal, Op0.getOperand(0),
  4489. getI32Imm(1, dl)),
  4490. 0);
  4491. SH = 64 - Imm;
  4492. }
  4493. }
  4494. // If the operand is a logical right shift, we can fold it into this
  4495. // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb)
  4496. // for n <= mb. The right shift is really a left rotate followed by a
  4497. // mask, and this mask is a more-restrictive sub-mask of the mask implied
  4498. // by the shift.
  4499. if (Val.getOpcode() == ISD::SRL &&
  4500. isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) {
  4501. assert(Imm < 64 && "Illegal shift amount");
  4502. Val = Val.getOperand(0);
  4503. SH = 64 - Imm;
  4504. }
  4505. SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl)};
  4506. CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
  4507. return true;
  4508. }
  4509. bool PPCDAGToDAGISel::tryAsSingleRLDICR(SDNode *N) {
  4510. assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
  4511. uint64_t Imm64;
  4512. if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) ||
  4513. !isMask_64(~Imm64))
  4514. return false;
  4515. // If this is a negated 64-bit zero-extension mask,
  4516. // i.e. the immediate is a sequence of ones from most significant side
  4517. // and all zero for reminder, we should use rldicr.
  4518. unsigned MB = 63 - countTrailingOnes(~Imm64);
  4519. unsigned SH = 0;
  4520. SDLoc dl(N);
  4521. SDValue Ops[] = {N->getOperand(0), getI32Imm(SH, dl), getI32Imm(MB, dl)};
  4522. CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops);
  4523. return true;
  4524. }
  4525. bool PPCDAGToDAGISel::tryAsSingleRLDIMI(SDNode *N) {
  4526. assert(N->getOpcode() == ISD::OR && "ISD::OR SDNode expected");
  4527. uint64_t Imm64;
  4528. unsigned MB, ME;
  4529. SDValue N0 = N->getOperand(0);
  4530. // We won't get fewer instructions if the imm is 32-bit integer.
  4531. // rldimi requires the imm to have consecutive ones with both sides zero.
  4532. // Also, make sure the first Op has only one use, otherwise this may increase
  4533. // register pressure since rldimi is destructive.
  4534. if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) ||
  4535. isUInt<32>(Imm64) || !isRunOfOnes64(Imm64, MB, ME) || !N0.hasOneUse())
  4536. return false;
  4537. unsigned SH = 63 - ME;
  4538. SDLoc Dl(N);
  4539. // Use select64Imm for making LI instr instead of directly putting Imm64
  4540. SDValue Ops[] = {
  4541. N->getOperand(0),
  4542. SDValue(selectI64Imm(CurDAG, getI64Imm(-1, Dl).getNode()), 0),
  4543. getI32Imm(SH, Dl), getI32Imm(MB, Dl)};
  4544. CurDAG->SelectNodeTo(N, PPC::RLDIMI, MVT::i64, Ops);
  4545. return true;
  4546. }
  4547. // Select - Convert the specified operand from a target-independent to a
  4548. // target-specific node if it hasn't already been changed.
  4549. void PPCDAGToDAGISel::Select(SDNode *N) {
  4550. SDLoc dl(N);
  4551. if (N->isMachineOpcode()) {
  4552. N->setNodeId(-1);
  4553. return; // Already selected.
  4554. }
  4555. // In case any misguided DAG-level optimizations form an ADD with a
  4556. // TargetConstant operand, crash here instead of miscompiling (by selecting
  4557. // an r+r add instead of some kind of r+i add).
  4558. if (N->getOpcode() == ISD::ADD &&
  4559. N->getOperand(1).getOpcode() == ISD::TargetConstant)
  4560. llvm_unreachable("Invalid ADD with TargetConstant operand");
  4561. // Try matching complex bit permutations before doing anything else.
  4562. if (tryBitPermutation(N))
  4563. return;
  4564. // Try to emit integer compares as GPR-only sequences (i.e. no use of CR).
  4565. if (tryIntCompareInGPR(N))
  4566. return;
  4567. switch (N->getOpcode()) {
  4568. default: break;
  4569. case ISD::Constant:
  4570. if (N->getValueType(0) == MVT::i64) {
  4571. ReplaceNode(N, selectI64Imm(CurDAG, N));
  4572. return;
  4573. }
  4574. break;
  4575. case ISD::INTRINSIC_VOID: {
  4576. auto IntrinsicID = N->getConstantOperandVal(1);
  4577. if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&
  4578. IntrinsicID != Intrinsic::ppc_trapd &&
  4579. IntrinsicID != Intrinsic::ppc_trap)
  4580. break;
  4581. unsigned Opcode = (IntrinsicID == Intrinsic::ppc_tdw ||
  4582. IntrinsicID == Intrinsic::ppc_trapd)
  4583. ? PPC::TDI
  4584. : PPC::TWI;
  4585. SmallVector<SDValue, 4> OpsWithMD;
  4586. unsigned MDIndex;
  4587. if (IntrinsicID == Intrinsic::ppc_tdw ||
  4588. IntrinsicID == Intrinsic::ppc_tw) {
  4589. SDValue Ops[] = {N->getOperand(4), N->getOperand(2), N->getOperand(3)};
  4590. int16_t SImmOperand2;
  4591. int16_t SImmOperand3;
  4592. int16_t SImmOperand4;
  4593. bool isOperand2IntS16Immediate =
  4594. isIntS16Immediate(N->getOperand(2), SImmOperand2);
  4595. bool isOperand3IntS16Immediate =
  4596. isIntS16Immediate(N->getOperand(3), SImmOperand3);
  4597. // We will emit PPC::TD or PPC::TW if the 2nd and 3rd operands are reg +
  4598. // reg or imm + imm. The imm + imm form will be optimized to either an
  4599. // unconditional trap or a nop in a later pass.
  4600. if (isOperand2IntS16Immediate == isOperand3IntS16Immediate)
  4601. Opcode = IntrinsicID == Intrinsic::ppc_tdw ? PPC::TD : PPC::TW;
  4602. else if (isOperand3IntS16Immediate)
  4603. // The 2nd and 3rd operands are reg + imm.
  4604. Ops[2] = getI32Imm(int(SImmOperand3) & 0xFFFF, dl);
  4605. else {
  4606. // The 2nd and 3rd operands are imm + reg.
  4607. bool isOperand4IntS16Immediate =
  4608. isIntS16Immediate(N->getOperand(4), SImmOperand4);
  4609. (void)isOperand4IntS16Immediate;
  4610. assert(isOperand4IntS16Immediate &&
  4611. "The 4th operand is not an Immediate");
  4612. // We need to flip the condition immediate TO.
  4613. int16_t TO = int(SImmOperand4) & 0x1F;
  4614. // We swap the first and second bit of TO if they are not same.
  4615. if ((TO & 0x1) != ((TO & 0x2) >> 1))
  4616. TO = (TO & 0x1) ? TO + 1 : TO - 1;
  4617. // We swap the fourth and fifth bit of TO if they are not same.
  4618. if ((TO & 0x8) != ((TO & 0x10) >> 1))
  4619. TO = (TO & 0x8) ? TO + 8 : TO - 8;
  4620. Ops[0] = getI32Imm(TO, dl);
  4621. Ops[1] = N->getOperand(3);
  4622. Ops[2] = getI32Imm(int(SImmOperand2) & 0xFFFF, dl);
  4623. }
  4624. OpsWithMD = {Ops[0], Ops[1], Ops[2]};
  4625. MDIndex = 5;
  4626. } else {
  4627. OpsWithMD = {getI32Imm(24, dl), N->getOperand(2), getI32Imm(0, dl)};
  4628. MDIndex = 3;
  4629. }
  4630. if (N->getNumOperands() > MDIndex) {
  4631. SDValue MDV = N->getOperand(MDIndex);
  4632. const MDNode *MD = cast<MDNodeSDNode>(MDV)->getMD();
  4633. assert(MD->getNumOperands() != 0 && "Empty MDNode in operands!");
  4634. assert((isa<MDString>(MD->getOperand(0)) && cast<MDString>(
  4635. MD->getOperand(0))->getString().equals("ppc-trap-reason"))
  4636. && "Unsupported annotation data type!");
  4637. for (unsigned i = 1; i < MD->getNumOperands(); i++) {
  4638. assert(isa<MDString>(MD->getOperand(i)) &&
  4639. "Invalid data type for annotation ppc-trap-reason!");
  4640. OpsWithMD.push_back(
  4641. getI32Imm(std::stoi(cast<MDString>(
  4642. MD->getOperand(i))->getString().str()), dl));
  4643. }
  4644. }
  4645. OpsWithMD.push_back(N->getOperand(0)); // chain
  4646. CurDAG->SelectNodeTo(N, Opcode, MVT::Other, OpsWithMD);
  4647. return;
  4648. }
  4649. case ISD::INTRINSIC_WO_CHAIN: {
  4650. // We emit the PPC::FSELS instruction here because of type conflicts with
  4651. // the comparison operand. The FSELS instruction is defined to use an 8-byte
  4652. // comparison like the FSELD version. The fsels intrinsic takes a 4-byte
  4653. // value for the comparison. When selecting through a .td file, a type
  4654. // error is raised. Must check this first so we never break on the
  4655. // !Subtarget->isISA3_1() check.
  4656. auto IntID = N->getConstantOperandVal(0);
  4657. if (IntID == Intrinsic::ppc_fsels) {
  4658. SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3)};
  4659. CurDAG->SelectNodeTo(N, PPC::FSELS, MVT::f32, Ops);
  4660. return;
  4661. }
  4662. if (IntID == Intrinsic::ppc_bcdadd_p || IntID == Intrinsic::ppc_bcdsub_p) {
  4663. auto Pred = N->getConstantOperandVal(1);
  4664. unsigned Opcode =
  4665. IntID == Intrinsic::ppc_bcdadd_p ? PPC::BCDADD_rec : PPC::BCDSUB_rec;
  4666. unsigned SubReg = 0;
  4667. unsigned ShiftVal = 0;
  4668. bool Reverse = false;
  4669. switch (Pred) {
  4670. case 0:
  4671. SubReg = PPC::sub_eq;
  4672. ShiftVal = 1;
  4673. break;
  4674. case 1:
  4675. SubReg = PPC::sub_eq;
  4676. ShiftVal = 1;
  4677. Reverse = true;
  4678. break;
  4679. case 2:
  4680. SubReg = PPC::sub_lt;
  4681. ShiftVal = 3;
  4682. break;
  4683. case 3:
  4684. SubReg = PPC::sub_lt;
  4685. ShiftVal = 3;
  4686. Reverse = true;
  4687. break;
  4688. case 4:
  4689. SubReg = PPC::sub_gt;
  4690. ShiftVal = 2;
  4691. break;
  4692. case 5:
  4693. SubReg = PPC::sub_gt;
  4694. ShiftVal = 2;
  4695. Reverse = true;
  4696. break;
  4697. case 6:
  4698. SubReg = PPC::sub_un;
  4699. break;
  4700. case 7:
  4701. SubReg = PPC::sub_un;
  4702. Reverse = true;
  4703. break;
  4704. }
  4705. EVT VTs[] = {MVT::v16i8, MVT::Glue};
  4706. SDValue Ops[] = {N->getOperand(2), N->getOperand(3),
  4707. CurDAG->getTargetConstant(0, dl, MVT::i32)};
  4708. SDValue BCDOp = SDValue(CurDAG->getMachineNode(Opcode, dl, VTs, Ops), 0);
  4709. SDValue CR6Reg = CurDAG->getRegister(PPC::CR6, MVT::i32);
  4710. // On Power10, we can use SETBC[R]. On prior architectures, we have to use
  4711. // MFOCRF and shift/negate the value.
  4712. if (Subtarget->isISA3_1()) {
  4713. SDValue SubRegIdx = CurDAG->getTargetConstant(SubReg, dl, MVT::i32);
  4714. SDValue CRBit = SDValue(
  4715. CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
  4716. CR6Reg, SubRegIdx, BCDOp.getValue(1)),
  4717. 0);
  4718. CurDAG->SelectNodeTo(N, Reverse ? PPC::SETBCR : PPC::SETBC, MVT::i32,
  4719. CRBit);
  4720. } else {
  4721. SDValue Move =
  4722. SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR6Reg,
  4723. BCDOp.getValue(1)),
  4724. 0);
  4725. SDValue Ops[] = {Move, getI32Imm((32 - (4 + ShiftVal)) & 31, dl),
  4726. getI32Imm(31, dl), getI32Imm(31, dl)};
  4727. if (!Reverse)
  4728. CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
  4729. else {
  4730. SDValue Shift = SDValue(
  4731. CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
  4732. CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Shift, getI32Imm(1, dl));
  4733. }
  4734. }
  4735. return;
  4736. }
  4737. if (!Subtarget->isISA3_1())
  4738. break;
  4739. unsigned Opcode = 0;
  4740. switch (IntID) {
  4741. default:
  4742. break;
  4743. case Intrinsic::ppc_altivec_vstribr_p:
  4744. Opcode = PPC::VSTRIBR_rec;
  4745. break;
  4746. case Intrinsic::ppc_altivec_vstribl_p:
  4747. Opcode = PPC::VSTRIBL_rec;
  4748. break;
  4749. case Intrinsic::ppc_altivec_vstrihr_p:
  4750. Opcode = PPC::VSTRIHR_rec;
  4751. break;
  4752. case Intrinsic::ppc_altivec_vstrihl_p:
  4753. Opcode = PPC::VSTRIHL_rec;
  4754. break;
  4755. }
  4756. if (!Opcode)
  4757. break;
  4758. // Generate the appropriate vector string isolate intrinsic to match.
  4759. EVT VTs[] = {MVT::v16i8, MVT::Glue};
  4760. SDValue VecStrOp =
  4761. SDValue(CurDAG->getMachineNode(Opcode, dl, VTs, N->getOperand(2)), 0);
  4762. // Vector string isolate instructions update the EQ bit of CR6.
  4763. // Generate a SETBC instruction to extract the bit and place it in a GPR.
  4764. SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_eq, dl, MVT::i32);
  4765. SDValue CR6Reg = CurDAG->getRegister(PPC::CR6, MVT::i32);
  4766. SDValue CRBit = SDValue(
  4767. CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
  4768. CR6Reg, SubRegIdx, VecStrOp.getValue(1)),
  4769. 0);
  4770. CurDAG->SelectNodeTo(N, PPC::SETBC, MVT::i32, CRBit);
  4771. return;
  4772. }
  4773. case ISD::SETCC:
  4774. case ISD::STRICT_FSETCC:
  4775. case ISD::STRICT_FSETCCS:
  4776. if (trySETCC(N))
  4777. return;
  4778. break;
  4779. // These nodes will be transformed into GETtlsADDR32 node, which
  4780. // later becomes BL_TLS __tls_get_addr(sym at tlsgd)@PLT
  4781. case PPCISD::ADDI_TLSLD_L_ADDR:
  4782. case PPCISD::ADDI_TLSGD_L_ADDR: {
  4783. const Module *Mod = MF->getFunction().getParent();
  4784. if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 ||
  4785. !Subtarget->isSecurePlt() || !Subtarget->isTargetELF() ||
  4786. Mod->getPICLevel() == PICLevel::SmallPIC)
  4787. break;
  4788. // Attach global base pointer on GETtlsADDR32 node in order to
  4789. // generate secure plt code for TLS symbols.
  4790. getGlobalBaseReg();
  4791. } break;
  4792. case PPCISD::CALL: {
  4793. if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 ||
  4794. !TM.isPositionIndependent() || !Subtarget->isSecurePlt() ||
  4795. !Subtarget->isTargetELF())
  4796. break;
  4797. SDValue Op = N->getOperand(1);
  4798. if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
  4799. if (GA->getTargetFlags() == PPCII::MO_PLT)
  4800. getGlobalBaseReg();
  4801. }
  4802. else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) {
  4803. if (ES->getTargetFlags() == PPCII::MO_PLT)
  4804. getGlobalBaseReg();
  4805. }
  4806. }
  4807. break;
  4808. case PPCISD::GlobalBaseReg:
  4809. ReplaceNode(N, getGlobalBaseReg());
  4810. return;
  4811. case ISD::FrameIndex:
  4812. selectFrameIndex(N, N);
  4813. return;
  4814. case PPCISD::MFOCRF: {
  4815. SDValue InFlag = N->getOperand(1);
  4816. ReplaceNode(N, CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32,
  4817. N->getOperand(0), InFlag));
  4818. return;
  4819. }
  4820. case PPCISD::READ_TIME_BASE:
  4821. ReplaceNode(N, CurDAG->getMachineNode(PPC::ReadTB, dl, MVT::i32, MVT::i32,
  4822. MVT::Other, N->getOperand(0)));
  4823. return;
  4824. case PPCISD::SRA_ADDZE: {
  4825. SDValue N0 = N->getOperand(0);
  4826. SDValue ShiftAmt =
  4827. CurDAG->getTargetConstant(*cast<ConstantSDNode>(N->getOperand(1))->
  4828. getConstantIntValue(), dl,
  4829. N->getValueType(0));
  4830. if (N->getValueType(0) == MVT::i64) {
  4831. SDNode *Op =
  4832. CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, MVT::Glue,
  4833. N0, ShiftAmt);
  4834. CurDAG->SelectNodeTo(N, PPC::ADDZE8, MVT::i64, SDValue(Op, 0),
  4835. SDValue(Op, 1));
  4836. return;
  4837. } else {
  4838. assert(N->getValueType(0) == MVT::i32 &&
  4839. "Expecting i64 or i32 in PPCISD::SRA_ADDZE");
  4840. SDNode *Op =
  4841. CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
  4842. N0, ShiftAmt);
  4843. CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, SDValue(Op, 0),
  4844. SDValue(Op, 1));
  4845. return;
  4846. }
  4847. }
  4848. case ISD::STORE: {
  4849. // Change TLS initial-exec D-form stores to X-form stores.
  4850. StoreSDNode *ST = cast<StoreSDNode>(N);
  4851. if (EnableTLSOpt && Subtarget->isELFv2ABI() &&
  4852. ST->getAddressingMode() != ISD::PRE_INC)
  4853. if (tryTLSXFormStore(ST))
  4854. return;
  4855. break;
  4856. }
  4857. case ISD::LOAD: {
  4858. // Handle preincrement loads.
  4859. LoadSDNode *LD = cast<LoadSDNode>(N);
  4860. EVT LoadedVT = LD->getMemoryVT();
  4861. // Normal loads are handled by code generated from the .td file.
  4862. if (LD->getAddressingMode() != ISD::PRE_INC) {
  4863. // Change TLS initial-exec D-form loads to X-form loads.
  4864. if (EnableTLSOpt && Subtarget->isELFv2ABI())
  4865. if (tryTLSXFormLoad(LD))
  4866. return;
  4867. break;
  4868. }
  4869. SDValue Offset = LD->getOffset();
  4870. if (Offset.getOpcode() == ISD::TargetConstant ||
  4871. Offset.getOpcode() == ISD::TargetGlobalAddress) {
  4872. unsigned Opcode;
  4873. bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
  4874. if (LD->getValueType(0) != MVT::i64) {
  4875. // Handle PPC32 integer and normal FP loads.
  4876. assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
  4877. switch (LoadedVT.getSimpleVT().SimpleTy) {
  4878. default: llvm_unreachable("Invalid PPC load type!");
  4879. case MVT::f64: Opcode = PPC::LFDU; break;
  4880. case MVT::f32: Opcode = PPC::LFSU; break;
  4881. case MVT::i32: Opcode = PPC::LWZU; break;
  4882. case MVT::i16: Opcode = isSExt ? PPC::LHAU : PPC::LHZU; break;
  4883. case MVT::i1:
  4884. case MVT::i8: Opcode = PPC::LBZU; break;
  4885. }
  4886. } else {
  4887. assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
  4888. assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
  4889. switch (LoadedVT.getSimpleVT().SimpleTy) {
  4890. default: llvm_unreachable("Invalid PPC load type!");
  4891. case MVT::i64: Opcode = PPC::LDU; break;
  4892. case MVT::i32: Opcode = PPC::LWZU8; break;
  4893. case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break;
  4894. case MVT::i1:
  4895. case MVT::i8: Opcode = PPC::LBZU8; break;
  4896. }
  4897. }
  4898. SDValue Chain = LD->getChain();
  4899. SDValue Base = LD->getBasePtr();
  4900. SDValue Ops[] = { Offset, Base, Chain };
  4901. SDNode *MN = CurDAG->getMachineNode(
  4902. Opcode, dl, LD->getValueType(0),
  4903. PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops);
  4904. transferMemOperands(N, MN);
  4905. ReplaceNode(N, MN);
  4906. return;
  4907. } else {
  4908. unsigned Opcode;
  4909. bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
  4910. if (LD->getValueType(0) != MVT::i64) {
  4911. // Handle PPC32 integer and normal FP loads.
  4912. assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
  4913. switch (LoadedVT.getSimpleVT().SimpleTy) {
  4914. default: llvm_unreachable("Invalid PPC load type!");
  4915. case MVT::f64: Opcode = PPC::LFDUX; break;
  4916. case MVT::f32: Opcode = PPC::LFSUX; break;
  4917. case MVT::i32: Opcode = PPC::LWZUX; break;
  4918. case MVT::i16: Opcode = isSExt ? PPC::LHAUX : PPC::LHZUX; break;
  4919. case MVT::i1:
  4920. case MVT::i8: Opcode = PPC::LBZUX; break;
  4921. }
  4922. } else {
  4923. assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
  4924. assert((!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) &&
  4925. "Invalid sext update load");
  4926. switch (LoadedVT.getSimpleVT().SimpleTy) {
  4927. default: llvm_unreachable("Invalid PPC load type!");
  4928. case MVT::i64: Opcode = PPC::LDUX; break;
  4929. case MVT::i32: Opcode = isSExt ? PPC::LWAUX : PPC::LWZUX8; break;
  4930. case MVT::i16: Opcode = isSExt ? PPC::LHAUX8 : PPC::LHZUX8; break;
  4931. case MVT::i1:
  4932. case MVT::i8: Opcode = PPC::LBZUX8; break;
  4933. }
  4934. }
  4935. SDValue Chain = LD->getChain();
  4936. SDValue Base = LD->getBasePtr();
  4937. SDValue Ops[] = { Base, Offset, Chain };
  4938. SDNode *MN = CurDAG->getMachineNode(
  4939. Opcode, dl, LD->getValueType(0),
  4940. PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops);
  4941. transferMemOperands(N, MN);
  4942. ReplaceNode(N, MN);
  4943. return;
  4944. }
  4945. }
  4946. case ISD::AND:
  4947. // If this is an 'and' with a mask, try to emit rlwinm/rldicl/rldicr
  4948. if (tryAsSingleRLWINM(N) || tryAsSingleRLWIMI(N) || tryAsSingleRLDICL(N) ||
  4949. tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N) || tryAsPairOfRLDICL(N))
  4950. return;
  4951. // Other cases are autogenerated.
  4952. break;
  4953. case ISD::OR: {
  4954. if (N->getValueType(0) == MVT::i32)
  4955. if (tryBitfieldInsert(N))
  4956. return;
  4957. int16_t Imm;
  4958. if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
  4959. isIntS16Immediate(N->getOperand(1), Imm)) {
  4960. KnownBits LHSKnown = CurDAG->computeKnownBits(N->getOperand(0));
  4961. // If this is equivalent to an add, then we can fold it with the
  4962. // FrameIndex calculation.
  4963. if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) {
  4964. selectFrameIndex(N, N->getOperand(0).getNode(), (int64_t)Imm);
  4965. return;
  4966. }
  4967. }
  4968. // If this is 'or' against an imm with consecutive ones and both sides zero,
  4969. // try to emit rldimi
  4970. if (tryAsSingleRLDIMI(N))
  4971. return;
  4972. // OR with a 32-bit immediate can be handled by ori + oris
  4973. // without creating an immediate in a GPR.
  4974. uint64_t Imm64 = 0;
  4975. bool IsPPC64 = Subtarget->isPPC64();
  4976. if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) &&
  4977. (Imm64 & ~0xFFFFFFFFuLL) == 0) {
  4978. // If ImmHi (ImmHi) is zero, only one ori (oris) is generated later.
  4979. uint64_t ImmHi = Imm64 >> 16;
  4980. uint64_t ImmLo = Imm64 & 0xFFFF;
  4981. if (ImmHi != 0 && ImmLo != 0) {
  4982. SDNode *Lo = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
  4983. N->getOperand(0),
  4984. getI16Imm(ImmLo, dl));
  4985. SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)};
  4986. CurDAG->SelectNodeTo(N, PPC::ORIS8, MVT::i64, Ops1);
  4987. return;
  4988. }
  4989. }
  4990. // Other cases are autogenerated.
  4991. break;
  4992. }
  4993. case ISD::XOR: {
  4994. // XOR with a 32-bit immediate can be handled by xori + xoris
  4995. // without creating an immediate in a GPR.
  4996. uint64_t Imm64 = 0;
  4997. bool IsPPC64 = Subtarget->isPPC64();
  4998. if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) &&
  4999. (Imm64 & ~0xFFFFFFFFuLL) == 0) {
  5000. // If ImmHi (ImmHi) is zero, only one xori (xoris) is generated later.
  5001. uint64_t ImmHi = Imm64 >> 16;
  5002. uint64_t ImmLo = Imm64 & 0xFFFF;
  5003. if (ImmHi != 0 && ImmLo != 0) {
  5004. SDNode *Lo = CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
  5005. N->getOperand(0),
  5006. getI16Imm(ImmLo, dl));
  5007. SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)};
  5008. CurDAG->SelectNodeTo(N, PPC::XORIS8, MVT::i64, Ops1);
  5009. return;
  5010. }
  5011. }
  5012. break;
  5013. }
  5014. case ISD::ADD: {
  5015. int16_t Imm;
  5016. if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
  5017. isIntS16Immediate(N->getOperand(1), Imm)) {
  5018. selectFrameIndex(N, N->getOperand(0).getNode(), (int64_t)Imm);
  5019. return;
  5020. }
  5021. break;
  5022. }
  5023. case ISD::SHL: {
  5024. unsigned Imm, SH, MB, ME;
  5025. if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
  5026. isRotateAndMask(N, Imm, true, SH, MB, ME)) {
  5027. SDValue Ops[] = { N->getOperand(0).getOperand(0),
  5028. getI32Imm(SH, dl), getI32Imm(MB, dl),
  5029. getI32Imm(ME, dl) };
  5030. CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
  5031. return;
  5032. }
  5033. // Other cases are autogenerated.
  5034. break;
  5035. }
  5036. case ISD::SRL: {
  5037. unsigned Imm, SH, MB, ME;
  5038. if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
  5039. isRotateAndMask(N, Imm, true, SH, MB, ME)) {
  5040. SDValue Ops[] = { N->getOperand(0).getOperand(0),
  5041. getI32Imm(SH, dl), getI32Imm(MB, dl),
  5042. getI32Imm(ME, dl) };
  5043. CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
  5044. return;
  5045. }
  5046. // Other cases are autogenerated.
  5047. break;
  5048. }
  5049. case ISD::MUL: {
  5050. SDValue Op1 = N->getOperand(1);
  5051. if (Op1.getOpcode() != ISD::Constant ||
  5052. (Op1.getValueType() != MVT::i64 && Op1.getValueType() != MVT::i32))
  5053. break;
  5054. // If the multiplier fits int16, we can handle it with mulli.
  5055. int64_t Imm = cast<ConstantSDNode>(Op1)->getZExtValue();
  5056. unsigned Shift = countTrailingZeros<uint64_t>(Imm);
  5057. if (isInt<16>(Imm) || !Shift)
  5058. break;
  5059. // If the shifted value fits int16, we can do this transformation:
  5060. // (mul X, c1 << c2) -> (rldicr (mulli X, c1) c2). We do this in ISEL due to
  5061. // DAGCombiner prefers (shl (mul X, c1), c2) -> (mul X, c1 << c2).
  5062. uint64_t ImmSh = Imm >> Shift;
  5063. if (!isInt<16>(ImmSh))
  5064. break;
  5065. uint64_t SextImm = SignExtend64(ImmSh & 0xFFFF, 16);
  5066. if (Op1.getValueType() == MVT::i64) {
  5067. SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);
  5068. SDNode *MulNode = CurDAG->getMachineNode(PPC::MULLI8, dl, MVT::i64,
  5069. N->getOperand(0), SDImm);
  5070. SDValue Ops[] = {SDValue(MulNode, 0), getI32Imm(Shift, dl),
  5071. getI32Imm(63 - Shift, dl)};
  5072. CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops);
  5073. return;
  5074. } else {
  5075. SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i32);
  5076. SDNode *MulNode = CurDAG->getMachineNode(PPC::MULLI, dl, MVT::i32,
  5077. N->getOperand(0), SDImm);
  5078. SDValue Ops[] = {SDValue(MulNode, 0), getI32Imm(Shift, dl),
  5079. getI32Imm(0, dl), getI32Imm(31 - Shift, dl)};
  5080. CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
  5081. return;
  5082. }
  5083. break;
  5084. }
  5085. // FIXME: Remove this once the ANDI glue bug is fixed:
  5086. case PPCISD::ANDI_rec_1_EQ_BIT:
  5087. case PPCISD::ANDI_rec_1_GT_BIT: {
  5088. if (!ANDIGlueBug)
  5089. break;
  5090. EVT InVT = N->getOperand(0).getValueType();
  5091. assert((InVT == MVT::i64 || InVT == MVT::i32) &&
  5092. "Invalid input type for ANDI_rec_1_EQ_BIT");
  5093. unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDI8_rec : PPC::ANDI_rec;
  5094. SDValue AndI(CurDAG->getMachineNode(Opcode, dl, InVT, MVT::Glue,
  5095. N->getOperand(0),
  5096. CurDAG->getTargetConstant(1, dl, InVT)),
  5097. 0);
  5098. SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
  5099. SDValue SRIdxVal = CurDAG->getTargetConstant(
  5100. N->getOpcode() == PPCISD::ANDI_rec_1_EQ_BIT ? PPC::sub_eq : PPC::sub_gt,
  5101. dl, MVT::i32);
  5102. CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1, CR0Reg,
  5103. SRIdxVal, SDValue(AndI.getNode(), 1) /* glue */);
  5104. return;
  5105. }
  5106. case ISD::SELECT_CC: {
  5107. ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
  5108. EVT PtrVT =
  5109. CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());
  5110. bool isPPC64 = (PtrVT == MVT::i64);
  5111. // If this is a select of i1 operands, we'll pattern match it.
  5112. if (Subtarget->useCRBits() && N->getOperand(0).getValueType() == MVT::i1)
  5113. break;
  5114. if (Subtarget->isISA3_0() && Subtarget->isPPC64()) {
  5115. bool NeedSwapOps = false;
  5116. bool IsUnCmp = false;
  5117. if (mayUseP9Setb(N, CC, CurDAG, NeedSwapOps, IsUnCmp)) {
  5118. SDValue LHS = N->getOperand(0);
  5119. SDValue RHS = N->getOperand(1);
  5120. if (NeedSwapOps)
  5121. std::swap(LHS, RHS);
  5122. // Make use of SelectCC to generate the comparison to set CR bits, for
  5123. // equality comparisons having one literal operand, SelectCC probably
  5124. // doesn't need to materialize the whole literal and just use xoris to
  5125. // check it first, it leads the following comparison result can't
  5126. // exactly represent GT/LT relationship. So to avoid this we specify
  5127. // SETGT/SETUGT here instead of SETEQ.
  5128. SDValue GenCC =
  5129. SelectCC(LHS, RHS, IsUnCmp ? ISD::SETUGT : ISD::SETGT, dl);
  5130. CurDAG->SelectNodeTo(
  5131. N, N->getSimpleValueType(0) == MVT::i64 ? PPC::SETB8 : PPC::SETB,
  5132. N->getValueType(0), GenCC);
  5133. NumP9Setb++;
  5134. return;
  5135. }
  5136. }
  5137. // Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc
  5138. if (!isPPC64)
  5139. if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
  5140. if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
  5141. if (ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
  5142. if (N1C->isZero() && N3C->isZero() && N2C->getZExtValue() == 1ULL &&
  5143. CC == ISD::SETNE &&
  5144. // FIXME: Implement this optzn for PPC64.
  5145. N->getValueType(0) == MVT::i32) {
  5146. SDNode *Tmp =
  5147. CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
  5148. N->getOperand(0), getI32Imm(~0U, dl));
  5149. CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(Tmp, 0),
  5150. N->getOperand(0), SDValue(Tmp, 1));
  5151. return;
  5152. }
  5153. SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl);
  5154. if (N->getValueType(0) == MVT::i1) {
  5155. // An i1 select is: (c & t) | (!c & f).
  5156. bool Inv;
  5157. unsigned Idx = getCRIdxForSetCC(CC, Inv);
  5158. unsigned SRI;
  5159. switch (Idx) {
  5160. default: llvm_unreachable("Invalid CC index");
  5161. case 0: SRI = PPC::sub_lt; break;
  5162. case 1: SRI = PPC::sub_gt; break;
  5163. case 2: SRI = PPC::sub_eq; break;
  5164. case 3: SRI = PPC::sub_un; break;
  5165. }
  5166. SDValue CCBit = CurDAG->getTargetExtractSubreg(SRI, dl, MVT::i1, CCReg);
  5167. SDValue NotCCBit(CurDAG->getMachineNode(PPC::CRNOR, dl, MVT::i1,
  5168. CCBit, CCBit), 0);
  5169. SDValue C = Inv ? NotCCBit : CCBit,
  5170. NotC = Inv ? CCBit : NotCCBit;
  5171. SDValue CAndT(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
  5172. C, N->getOperand(2)), 0);
  5173. SDValue NotCAndF(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
  5174. NotC, N->getOperand(3)), 0);
  5175. CurDAG->SelectNodeTo(N, PPC::CROR, MVT::i1, CAndT, NotCAndF);
  5176. return;
  5177. }
  5178. unsigned BROpc =
  5179. getPredicateForSetCC(CC, N->getOperand(0).getValueType(), Subtarget);
  5180. unsigned SelectCCOp;
  5181. if (N->getValueType(0) == MVT::i32)
  5182. SelectCCOp = PPC::SELECT_CC_I4;
  5183. else if (N->getValueType(0) == MVT::i64)
  5184. SelectCCOp = PPC::SELECT_CC_I8;
  5185. else if (N->getValueType(0) == MVT::f32) {
  5186. if (Subtarget->hasP8Vector())
  5187. SelectCCOp = PPC::SELECT_CC_VSSRC;
  5188. else if (Subtarget->hasSPE())
  5189. SelectCCOp = PPC::SELECT_CC_SPE4;
  5190. else
  5191. SelectCCOp = PPC::SELECT_CC_F4;
  5192. } else if (N->getValueType(0) == MVT::f64) {
  5193. if (Subtarget->hasVSX())
  5194. SelectCCOp = PPC::SELECT_CC_VSFRC;
  5195. else if (Subtarget->hasSPE())
  5196. SelectCCOp = PPC::SELECT_CC_SPE;
  5197. else
  5198. SelectCCOp = PPC::SELECT_CC_F8;
  5199. } else if (N->getValueType(0) == MVT::f128)
  5200. SelectCCOp = PPC::SELECT_CC_F16;
  5201. else if (Subtarget->hasSPE())
  5202. SelectCCOp = PPC::SELECT_CC_SPE;
  5203. else if (N->getValueType(0) == MVT::v2f64 ||
  5204. N->getValueType(0) == MVT::v2i64)
  5205. SelectCCOp = PPC::SELECT_CC_VSRC;
  5206. else
  5207. SelectCCOp = PPC::SELECT_CC_VRRC;
  5208. SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3),
  5209. getI32Imm(BROpc, dl) };
  5210. CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops);
  5211. return;
  5212. }
  5213. case ISD::VECTOR_SHUFFLE:
  5214. if (Subtarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 ||
  5215. N->getValueType(0) == MVT::v2i64)) {
  5216. ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
  5217. SDValue Op1 = N->getOperand(SVN->getMaskElt(0) < 2 ? 0 : 1),
  5218. Op2 = N->getOperand(SVN->getMaskElt(1) < 2 ? 0 : 1);
  5219. unsigned DM[2];
  5220. for (int i = 0; i < 2; ++i)
  5221. if (SVN->getMaskElt(i) <= 0 || SVN->getMaskElt(i) == 2)
  5222. DM[i] = 0;
  5223. else
  5224. DM[i] = 1;
  5225. if (Op1 == Op2 && DM[0] == 0 && DM[1] == 0 &&
  5226. Op1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
  5227. isa<LoadSDNode>(Op1.getOperand(0))) {
  5228. LoadSDNode *LD = cast<LoadSDNode>(Op1.getOperand(0));
  5229. SDValue Base, Offset;
  5230. if (LD->isUnindexed() && LD->hasOneUse() && Op1.hasOneUse() &&
  5231. (LD->getMemoryVT() == MVT::f64 ||
  5232. LD->getMemoryVT() == MVT::i64) &&
  5233. SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) {
  5234. SDValue Chain = LD->getChain();
  5235. SDValue Ops[] = { Base, Offset, Chain };
  5236. MachineMemOperand *MemOp = LD->getMemOperand();
  5237. SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX,
  5238. N->getValueType(0), Ops);
  5239. CurDAG->setNodeMemRefs(cast<MachineSDNode>(NewN), {MemOp});
  5240. return;
  5241. }
  5242. }
  5243. // For little endian, we must swap the input operands and adjust
  5244. // the mask elements (reverse and invert them).
  5245. if (Subtarget->isLittleEndian()) {
  5246. std::swap(Op1, Op2);
  5247. unsigned tmp = DM[0];
  5248. DM[0] = 1 - DM[1];
  5249. DM[1] = 1 - tmp;
  5250. }
  5251. SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), dl,
  5252. MVT::i32);
  5253. SDValue Ops[] = { Op1, Op2, DMV };
  5254. CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops);
  5255. return;
  5256. }
  5257. break;
  5258. case PPCISD::BDNZ:
  5259. case PPCISD::BDZ: {
  5260. bool IsPPC64 = Subtarget->isPPC64();
  5261. SDValue Ops[] = { N->getOperand(1), N->getOperand(0) };
  5262. CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ
  5263. ? (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ)
  5264. : (IsPPC64 ? PPC::BDZ8 : PPC::BDZ),
  5265. MVT::Other, Ops);
  5266. return;
  5267. }
  5268. case PPCISD::COND_BRANCH: {
  5269. // Op #0 is the Chain.
  5270. // Op #1 is the PPC::PRED_* number.
  5271. // Op #2 is the CR#
  5272. // Op #3 is the Dest MBB
  5273. // Op #4 is the Flag.
  5274. // Prevent PPC::PRED_* from being selected into LI.
  5275. unsigned PCC = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
  5276. if (EnableBranchHint)
  5277. PCC |= getBranchHint(PCC, *FuncInfo, N->getOperand(3));
  5278. SDValue Pred = getI32Imm(PCC, dl);
  5279. SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3),
  5280. N->getOperand(0), N->getOperand(4) };
  5281. CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
  5282. return;
  5283. }
  5284. case ISD::BR_CC: {
  5285. if (tryFoldSWTestBRCC(N))
  5286. return;
  5287. if (trySelectLoopCountIntrinsic(N))
  5288. return;
  5289. ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
  5290. unsigned PCC =
  5291. getPredicateForSetCC(CC, N->getOperand(2).getValueType(), Subtarget);
  5292. if (N->getOperand(2).getValueType() == MVT::i1) {
  5293. unsigned Opc;
  5294. bool Swap;
  5295. switch (PCC) {
  5296. default: llvm_unreachable("Unexpected Boolean-operand predicate");
  5297. case PPC::PRED_LT: Opc = PPC::CRANDC; Swap = true; break;
  5298. case PPC::PRED_LE: Opc = PPC::CRORC; Swap = true; break;
  5299. case PPC::PRED_EQ: Opc = PPC::CREQV; Swap = false; break;
  5300. case PPC::PRED_GE: Opc = PPC::CRORC; Swap = false; break;
  5301. case PPC::PRED_GT: Opc = PPC::CRANDC; Swap = false; break;
  5302. case PPC::PRED_NE: Opc = PPC::CRXOR; Swap = false; break;
  5303. }
  5304. // A signed comparison of i1 values produces the opposite result to an
  5305. // unsigned one if the condition code includes less-than or greater-than.
  5306. // This is because 1 is the most negative signed i1 number and the most
  5307. // positive unsigned i1 number. The CR-logical operations used for such
  5308. // comparisons are non-commutative so for signed comparisons vs. unsigned
  5309. // ones, the input operands just need to be swapped.
  5310. if (ISD::isSignedIntSetCC(CC))
  5311. Swap = !Swap;
  5312. SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1,
  5313. N->getOperand(Swap ? 3 : 2),
  5314. N->getOperand(Swap ? 2 : 3)), 0);
  5315. CurDAG->SelectNodeTo(N, PPC::BC, MVT::Other, BitComp, N->getOperand(4),
  5316. N->getOperand(0));
  5317. return;
  5318. }
  5319. if (EnableBranchHint)
  5320. PCC |= getBranchHint(PCC, *FuncInfo, N->getOperand(4));
  5321. SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl);
  5322. SDValue Ops[] = { getI32Imm(PCC, dl), CondCode,
  5323. N->getOperand(4), N->getOperand(0) };
  5324. CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
  5325. return;
  5326. }
  5327. case ISD::BRIND: {
  5328. // FIXME: Should custom lower this.
  5329. SDValue Chain = N->getOperand(0);
  5330. SDValue Target = N->getOperand(1);
  5331. unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8;
  5332. unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8;
  5333. Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Target,
  5334. Chain), 0);
  5335. CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain);
  5336. return;
  5337. }
  5338. case PPCISD::TOC_ENTRY: {
  5339. const bool isPPC64 = Subtarget->isPPC64();
  5340. const bool isELFABI = Subtarget->isSVR4ABI();
  5341. const bool isAIXABI = Subtarget->isAIXABI();
  5342. // PowerPC only support small, medium and large code model.
  5343. const CodeModel::Model CModel = TM.getCodeModel();
  5344. assert(!(CModel == CodeModel::Tiny || CModel == CodeModel::Kernel) &&
  5345. "PowerPC doesn't support tiny or kernel code models.");
  5346. if (isAIXABI && CModel == CodeModel::Medium)
  5347. report_fatal_error("Medium code model is not supported on AIX.");
  5348. // For 64-bit ELF small code model, we allow SelectCodeCommon to handle
  5349. // this, selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA. For AIX
  5350. // small code model, we need to check for a toc-data attribute.
  5351. if (isPPC64 && !isAIXABI && CModel == CodeModel::Small)
  5352. break;
  5353. auto replaceWith = [this, &dl](unsigned OpCode, SDNode *TocEntry,
  5354. EVT OperandTy) {
  5355. SDValue GA = TocEntry->getOperand(0);
  5356. SDValue TocBase = TocEntry->getOperand(1);
  5357. SDNode *MN = CurDAG->getMachineNode(OpCode, dl, OperandTy, GA, TocBase);
  5358. transferMemOperands(TocEntry, MN);
  5359. ReplaceNode(TocEntry, MN);
  5360. };
  5361. // Handle 32-bit small code model.
  5362. if (!isPPC64 && CModel == CodeModel::Small) {
  5363. // Transforms the ISD::TOC_ENTRY node to passed in Opcode, either
  5364. // PPC::ADDItoc, or PPC::LWZtoc
  5365. if (isELFABI) {
  5366. assert(TM.isPositionIndependent() &&
  5367. "32-bit ELF can only have TOC entries in position independent"
  5368. " code.");
  5369. // 32-bit ELF always uses a small code model toc access.
  5370. replaceWith(PPC::LWZtoc, N, MVT::i32);
  5371. return;
  5372. }
  5373. assert(isAIXABI && "ELF ABI already handled");
  5374. if (hasTocDataAttr(N->getOperand(0),
  5375. CurDAG->getDataLayout().getPointerSize())) {
  5376. replaceWith(PPC::ADDItoc, N, MVT::i32);
  5377. return;
  5378. }
  5379. replaceWith(PPC::LWZtoc, N, MVT::i32);
  5380. return;
  5381. }
  5382. if (isPPC64 && CModel == CodeModel::Small) {
  5383. assert(isAIXABI && "ELF ABI handled in common SelectCode");
  5384. if (hasTocDataAttr(N->getOperand(0),
  5385. CurDAG->getDataLayout().getPointerSize())) {
  5386. replaceWith(PPC::ADDItoc8, N, MVT::i64);
  5387. return;
  5388. }
  5389. // Break if it doesn't have toc data attribute. Proceed with common
  5390. // SelectCode.
  5391. break;
  5392. }
  5393. assert(CModel != CodeModel::Small && "All small code models handled.");
  5394. assert((isPPC64 || (isAIXABI && !isPPC64)) && "We are dealing with 64-bit"
  5395. " ELF/AIX or 32-bit AIX in the following.");
  5396. // Transforms the ISD::TOC_ENTRY node for 32-bit AIX large code model mode
  5397. // or 64-bit medium (ELF-only) or large (ELF and AIX) code model code. We
  5398. // generate two instructions as described below. The first source operand
  5399. // is a symbol reference. If it must be toc-referenced according to
  5400. // Subtarget, we generate:
  5401. // [32-bit AIX]
  5402. // LWZtocL(@sym, ADDIStocHA(%r2, @sym))
  5403. // [64-bit ELF/AIX]
  5404. // LDtocL(@sym, ADDIStocHA8(%x2, @sym))
  5405. // Otherwise we generate:
  5406. // ADDItocL(ADDIStocHA8(%x2, @sym), @sym)
  5407. SDValue GA = N->getOperand(0);
  5408. SDValue TOCbase = N->getOperand(1);
  5409. EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
  5410. SDNode *Tmp = CurDAG->getMachineNode(
  5411. isPPC64 ? PPC::ADDIStocHA8 : PPC::ADDIStocHA, dl, VT, TOCbase, GA);
  5412. if (PPCLowering->isAccessedAsGotIndirect(GA)) {
  5413. // If it is accessed as got-indirect, we need an extra LWZ/LD to load
  5414. // the address.
  5415. SDNode *MN = CurDAG->getMachineNode(
  5416. isPPC64 ? PPC::LDtocL : PPC::LWZtocL, dl, VT, GA, SDValue(Tmp, 0));
  5417. transferMemOperands(N, MN);
  5418. ReplaceNode(N, MN);
  5419. return;
  5420. }
  5421. // Build the address relative to the TOC-pointer.
  5422. ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,
  5423. SDValue(Tmp, 0), GA));
  5424. return;
  5425. }
  5426. case PPCISD::PPC32_PICGOT:
  5427. // Generate a PIC-safe GOT reference.
  5428. assert(Subtarget->is32BitELFABI() &&
  5429. "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4");
  5430. CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT,
  5431. PPCLowering->getPointerTy(CurDAG->getDataLayout()),
  5432. MVT::i32);
  5433. return;
  5434. case PPCISD::VADD_SPLAT: {
  5435. // This expands into one of three sequences, depending on whether
  5436. // the first operand is odd or even, positive or negative.
  5437. assert(isa<ConstantSDNode>(N->getOperand(0)) &&
  5438. isa<ConstantSDNode>(N->getOperand(1)) &&
  5439. "Invalid operand on VADD_SPLAT!");
  5440. int Elt = N->getConstantOperandVal(0);
  5441. int EltSize = N->getConstantOperandVal(1);
  5442. unsigned Opc1, Opc2, Opc3;
  5443. EVT VT;
  5444. if (EltSize == 1) {
  5445. Opc1 = PPC::VSPLTISB;
  5446. Opc2 = PPC::VADDUBM;
  5447. Opc3 = PPC::VSUBUBM;
  5448. VT = MVT::v16i8;
  5449. } else if (EltSize == 2) {
  5450. Opc1 = PPC::VSPLTISH;
  5451. Opc2 = PPC::VADDUHM;
  5452. Opc3 = PPC::VSUBUHM;
  5453. VT = MVT::v8i16;
  5454. } else {
  5455. assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!");
  5456. Opc1 = PPC::VSPLTISW;
  5457. Opc2 = PPC::VADDUWM;
  5458. Opc3 = PPC::VSUBUWM;
  5459. VT = MVT::v4i32;
  5460. }
  5461. if ((Elt & 1) == 0) {
  5462. // Elt is even, in the range [-32,-18] + [16,30].
  5463. //
  5464. // Convert: VADD_SPLAT elt, size
  5465. // Into: tmp = VSPLTIS[BHW] elt
  5466. // VADDU[BHW]M tmp, tmp
  5467. // Where: [BHW] = B for size = 1, H for size = 2, W for size = 4
  5468. SDValue EltVal = getI32Imm(Elt >> 1, dl);
  5469. SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
  5470. SDValue TmpVal = SDValue(Tmp, 0);
  5471. ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal));
  5472. return;
  5473. } else if (Elt > 0) {
  5474. // Elt is odd and positive, in the range [17,31].
  5475. //
  5476. // Convert: VADD_SPLAT elt, size
  5477. // Into: tmp1 = VSPLTIS[BHW] elt-16
  5478. // tmp2 = VSPLTIS[BHW] -16
  5479. // VSUBU[BHW]M tmp1, tmp2
  5480. SDValue EltVal = getI32Imm(Elt - 16, dl);
  5481. SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
  5482. EltVal = getI32Imm(-16, dl);
  5483. SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
  5484. ReplaceNode(N, CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0),
  5485. SDValue(Tmp2, 0)));
  5486. return;
  5487. } else {
  5488. // Elt is odd and negative, in the range [-31,-17].
  5489. //
  5490. // Convert: VADD_SPLAT elt, size
  5491. // Into: tmp1 = VSPLTIS[BHW] elt+16
  5492. // tmp2 = VSPLTIS[BHW] -16
  5493. // VADDU[BHW]M tmp1, tmp2
  5494. SDValue EltVal = getI32Imm(Elt + 16, dl);
  5495. SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
  5496. EltVal = getI32Imm(-16, dl);
  5497. SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
  5498. ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0),
  5499. SDValue(Tmp2, 0)));
  5500. return;
  5501. }
  5502. }
  5503. case PPCISD::LD_SPLAT: {
  5504. // Here we want to handle splat load for type v16i8 and v8i16 when there is
  5505. // no direct move, we don't need to use stack for this case. If target has
  5506. // direct move, we should be able to get the best selection in the .td file.
  5507. if (!Subtarget->hasAltivec() || Subtarget->hasDirectMove())
  5508. break;
  5509. EVT Type = N->getValueType(0);
  5510. if (Type != MVT::v16i8 && Type != MVT::v8i16)
  5511. break;
  5512. // If the alignment for the load is 16 or bigger, we don't need the
  5513. // permutated mask to get the required value. The value must be the 0
  5514. // element in big endian target or 7/15 in little endian target in the
  5515. // result vsx register of lvx instruction.
  5516. // Select the instruction in the .td file.
  5517. if (cast<MemIntrinsicSDNode>(N)->getAlign() >= Align(16) &&
  5518. isOffsetMultipleOf(N, 16))
  5519. break;
  5520. SDValue ZeroReg =
  5521. CurDAG->getRegister(Subtarget->isPPC64() ? PPC::ZERO8 : PPC::ZERO,
  5522. Subtarget->isPPC64() ? MVT::i64 : MVT::i32);
  5523. unsigned LIOpcode = Subtarget->isPPC64() ? PPC::LI8 : PPC::LI;
  5524. // v16i8 LD_SPLAT addr
  5525. // ======>
  5526. // Mask = LVSR/LVSL 0, addr
  5527. // LoadLow = LVX 0, addr
  5528. // Perm = VPERM LoadLow, LoadLow, Mask
  5529. // Splat = VSPLTB 15/0, Perm
  5530. //
  5531. // v8i16 LD_SPLAT addr
  5532. // ======>
  5533. // Mask = LVSR/LVSL 0, addr
  5534. // LoadLow = LVX 0, addr
  5535. // LoadHigh = LVX (LI, 1), addr
  5536. // Perm = VPERM LoadLow, LoadHigh, Mask
  5537. // Splat = VSPLTH 7/0, Perm
  5538. unsigned SplatOp = (Type == MVT::v16i8) ? PPC::VSPLTB : PPC::VSPLTH;
  5539. unsigned SplatElemIndex =
  5540. Subtarget->isLittleEndian() ? ((Type == MVT::v16i8) ? 15 : 7) : 0;
  5541. SDNode *Mask = CurDAG->getMachineNode(
  5542. Subtarget->isLittleEndian() ? PPC::LVSR : PPC::LVSL, dl, Type, ZeroReg,
  5543. N->getOperand(1));
  5544. SDNode *LoadLow =
  5545. CurDAG->getMachineNode(PPC::LVX, dl, MVT::v16i8, MVT::Other,
  5546. {ZeroReg, N->getOperand(1), N->getOperand(0)});
  5547. SDNode *LoadHigh = LoadLow;
  5548. if (Type == MVT::v8i16) {
  5549. LoadHigh = CurDAG->getMachineNode(
  5550. PPC::LVX, dl, MVT::v16i8, MVT::Other,
  5551. {SDValue(CurDAG->getMachineNode(
  5552. LIOpcode, dl, MVT::i32,
  5553. CurDAG->getTargetConstant(1, dl, MVT::i8)),
  5554. 0),
  5555. N->getOperand(1), SDValue(LoadLow, 1)});
  5556. }
  5557. CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(LoadHigh, 1));
  5558. transferMemOperands(N, LoadHigh);
  5559. SDNode *Perm =
  5560. CurDAG->getMachineNode(PPC::VPERM, dl, Type, SDValue(LoadLow, 0),
  5561. SDValue(LoadHigh, 0), SDValue(Mask, 0));
  5562. CurDAG->SelectNodeTo(N, SplatOp, Type,
  5563. CurDAG->getTargetConstant(SplatElemIndex, dl, MVT::i8),
  5564. SDValue(Perm, 0));
  5565. return;
  5566. }
  5567. }
  5568. SelectCode(N);
  5569. }
  5570. // If the target supports the cmpb instruction, do the idiom recognition here.
  5571. // We don't do this as a DAG combine because we don't want to do it as nodes
  5572. // are being combined (because we might miss part of the eventual idiom). We
  5573. // don't want to do it during instruction selection because we want to reuse
  5574. // the logic for lowering the masking operations already part of the
  5575. // instruction selector.
  5576. SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
  5577. SDLoc dl(N);
  5578. assert(N->getOpcode() == ISD::OR &&
  5579. "Only OR nodes are supported for CMPB");
  5580. SDValue Res;
  5581. if (!Subtarget->hasCMPB())
  5582. return Res;
  5583. if (N->getValueType(0) != MVT::i32 &&
  5584. N->getValueType(0) != MVT::i64)
  5585. return Res;
  5586. EVT VT = N->getValueType(0);
  5587. SDValue RHS, LHS;
  5588. bool BytesFound[8] = {false, false, false, false, false, false, false, false};
  5589. uint64_t Mask = 0, Alt = 0;
  5590. auto IsByteSelectCC = [this](SDValue O, unsigned &b,
  5591. uint64_t &Mask, uint64_t &Alt,
  5592. SDValue &LHS, SDValue &RHS) {
  5593. if (O.getOpcode() != ISD::SELECT_CC)
  5594. return false;
  5595. ISD::CondCode CC = cast<CondCodeSDNode>(O.getOperand(4))->get();
  5596. if (!isa<ConstantSDNode>(O.getOperand(2)) ||
  5597. !isa<ConstantSDNode>(O.getOperand(3)))
  5598. return false;
  5599. uint64_t PM = O.getConstantOperandVal(2);
  5600. uint64_t PAlt = O.getConstantOperandVal(3);
  5601. for (b = 0; b < 8; ++b) {
  5602. uint64_t Mask = UINT64_C(0xFF) << (8*b);
  5603. if (PM && (PM & Mask) == PM && (PAlt & Mask) == PAlt)
  5604. break;
  5605. }
  5606. if (b == 8)
  5607. return false;
  5608. Mask |= PM;
  5609. Alt |= PAlt;
  5610. if (!isa<ConstantSDNode>(O.getOperand(1)) ||
  5611. O.getConstantOperandVal(1) != 0) {
  5612. SDValue Op0 = O.getOperand(0), Op1 = O.getOperand(1);
  5613. if (Op0.getOpcode() == ISD::TRUNCATE)
  5614. Op0 = Op0.getOperand(0);
  5615. if (Op1.getOpcode() == ISD::TRUNCATE)
  5616. Op1 = Op1.getOperand(0);
  5617. if (Op0.getOpcode() == ISD::SRL && Op1.getOpcode() == ISD::SRL &&
  5618. Op0.getOperand(1) == Op1.getOperand(1) && CC == ISD::SETEQ &&
  5619. isa<ConstantSDNode>(Op0.getOperand(1))) {
  5620. unsigned Bits = Op0.getValueSizeInBits();
  5621. if (b != Bits/8-1)
  5622. return false;
  5623. if (Op0.getConstantOperandVal(1) != Bits-8)
  5624. return false;
  5625. LHS = Op0.getOperand(0);
  5626. RHS = Op1.getOperand(0);
  5627. return true;
  5628. }
  5629. // When we have small integers (i16 to be specific), the form present
  5630. // post-legalization uses SETULT in the SELECT_CC for the
  5631. // higher-order byte, depending on the fact that the
  5632. // even-higher-order bytes are known to all be zero, for example:
  5633. // select_cc (xor $lhs, $rhs), 256, 65280, 0, setult
  5634. // (so when the second byte is the same, because all higher-order
  5635. // bits from bytes 3 and 4 are known to be zero, the result of the
  5636. // xor can be at most 255)
  5637. if (Op0.getOpcode() == ISD::XOR && CC == ISD::SETULT &&
  5638. isa<ConstantSDNode>(O.getOperand(1))) {
  5639. uint64_t ULim = O.getConstantOperandVal(1);
  5640. if (ULim != (UINT64_C(1) << b*8))
  5641. return false;
  5642. // Now we need to make sure that the upper bytes are known to be
  5643. // zero.
  5644. unsigned Bits = Op0.getValueSizeInBits();
  5645. if (!CurDAG->MaskedValueIsZero(
  5646. Op0, APInt::getHighBitsSet(Bits, Bits - (b + 1) * 8)))
  5647. return false;
  5648. LHS = Op0.getOperand(0);
  5649. RHS = Op0.getOperand(1);
  5650. return true;
  5651. }
  5652. return false;
  5653. }
  5654. if (CC != ISD::SETEQ)
  5655. return false;
  5656. SDValue Op = O.getOperand(0);
  5657. if (Op.getOpcode() == ISD::AND) {
  5658. if (!isa<ConstantSDNode>(Op.getOperand(1)))
  5659. return false;
  5660. if (Op.getConstantOperandVal(1) != (UINT64_C(0xFF) << (8*b)))
  5661. return false;
  5662. SDValue XOR = Op.getOperand(0);
  5663. if (XOR.getOpcode() == ISD::TRUNCATE)
  5664. XOR = XOR.getOperand(0);
  5665. if (XOR.getOpcode() != ISD::XOR)
  5666. return false;
  5667. LHS = XOR.getOperand(0);
  5668. RHS = XOR.getOperand(1);
  5669. return true;
  5670. } else if (Op.getOpcode() == ISD::SRL) {
  5671. if (!isa<ConstantSDNode>(Op.getOperand(1)))
  5672. return false;
  5673. unsigned Bits = Op.getValueSizeInBits();
  5674. if (b != Bits/8-1)
  5675. return false;
  5676. if (Op.getConstantOperandVal(1) != Bits-8)
  5677. return false;
  5678. SDValue XOR = Op.getOperand(0);
  5679. if (XOR.getOpcode() == ISD::TRUNCATE)
  5680. XOR = XOR.getOperand(0);
  5681. if (XOR.getOpcode() != ISD::XOR)
  5682. return false;
  5683. LHS = XOR.getOperand(0);
  5684. RHS = XOR.getOperand(1);
  5685. return true;
  5686. }
  5687. return false;
  5688. };
  5689. SmallVector<SDValue, 8> Queue(1, SDValue(N, 0));
  5690. while (!Queue.empty()) {
  5691. SDValue V = Queue.pop_back_val();
  5692. for (const SDValue &O : V.getNode()->ops()) {
  5693. unsigned b = 0;
  5694. uint64_t M = 0, A = 0;
  5695. SDValue OLHS, ORHS;
  5696. if (O.getOpcode() == ISD::OR) {
  5697. Queue.push_back(O);
  5698. } else if (IsByteSelectCC(O, b, M, A, OLHS, ORHS)) {
  5699. if (!LHS) {
  5700. LHS = OLHS;
  5701. RHS = ORHS;
  5702. BytesFound[b] = true;
  5703. Mask |= M;
  5704. Alt |= A;
  5705. } else if ((LHS == ORHS && RHS == OLHS) ||
  5706. (RHS == ORHS && LHS == OLHS)) {
  5707. BytesFound[b] = true;
  5708. Mask |= M;
  5709. Alt |= A;
  5710. } else {
  5711. return Res;
  5712. }
  5713. } else {
  5714. return Res;
  5715. }
  5716. }
  5717. }
  5718. unsigned LastB = 0, BCnt = 0;
  5719. for (unsigned i = 0; i < 8; ++i)
  5720. if (BytesFound[LastB]) {
  5721. ++BCnt;
  5722. LastB = i;
  5723. }
  5724. if (!LastB || BCnt < 2)
  5725. return Res;
  5726. // Because we'll be zero-extending the output anyway if don't have a specific
  5727. // value for each input byte (via the Mask), we can 'anyext' the inputs.
  5728. if (LHS.getValueType() != VT) {
  5729. LHS = CurDAG->getAnyExtOrTrunc(LHS, dl, VT);
  5730. RHS = CurDAG->getAnyExtOrTrunc(RHS, dl, VT);
  5731. }
  5732. Res = CurDAG->getNode(PPCISD::CMPB, dl, VT, LHS, RHS);
  5733. bool NonTrivialMask = ((int64_t) Mask) != INT64_C(-1);
  5734. if (NonTrivialMask && !Alt) {
  5735. // Res = Mask & CMPB
  5736. Res = CurDAG->getNode(ISD::AND, dl, VT, Res,
  5737. CurDAG->getConstant(Mask, dl, VT));
  5738. } else if (Alt) {
  5739. // Res = (CMPB & Mask) | (~CMPB & Alt)
  5740. // Which, as suggested here:
  5741. // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
  5742. // can be written as:
  5743. // Res = Alt ^ ((Alt ^ Mask) & CMPB)
  5744. // useful because the (Alt ^ Mask) can be pre-computed.
  5745. Res = CurDAG->getNode(ISD::AND, dl, VT, Res,
  5746. CurDAG->getConstant(Mask ^ Alt, dl, VT));
  5747. Res = CurDAG->getNode(ISD::XOR, dl, VT, Res,
  5748. CurDAG->getConstant(Alt, dl, VT));
  5749. }
  5750. return Res;
  5751. }
  5752. // When CR bit registers are enabled, an extension of an i1 variable to a i32
  5753. // or i64 value is lowered in terms of a SELECT_I[48] operation, and thus
  5754. // involves constant materialization of a 0 or a 1 or both. If the result of
  5755. // the extension is then operated upon by some operator that can be constant
  5756. // folded with a constant 0 or 1, and that constant can be materialized using
  5757. // only one instruction (like a zero or one), then we should fold in those
  5758. // operations with the select.
  5759. void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) {
  5760. if (!Subtarget->useCRBits())
  5761. return;
  5762. if (N->getOpcode() != ISD::ZERO_EXTEND &&
  5763. N->getOpcode() != ISD::SIGN_EXTEND &&
  5764. N->getOpcode() != ISD::ANY_EXTEND)
  5765. return;
  5766. if (N->getOperand(0).getValueType() != MVT::i1)
  5767. return;
  5768. if (!N->hasOneUse())
  5769. return;
  5770. SDLoc dl(N);
  5771. EVT VT = N->getValueType(0);
  5772. SDValue Cond = N->getOperand(0);
  5773. SDValue ConstTrue =
  5774. CurDAG->getConstant(N->getOpcode() == ISD::SIGN_EXTEND ? -1 : 1, dl, VT);
  5775. SDValue ConstFalse = CurDAG->getConstant(0, dl, VT);
  5776. do {
  5777. SDNode *User = *N->use_begin();
  5778. if (User->getNumOperands() != 2)
  5779. break;
  5780. auto TryFold = [this, N, User, dl](SDValue Val) {
  5781. SDValue UserO0 = User->getOperand(0), UserO1 = User->getOperand(1);
  5782. SDValue O0 = UserO0.getNode() == N ? Val : UserO0;
  5783. SDValue O1 = UserO1.getNode() == N ? Val : UserO1;
  5784. return CurDAG->FoldConstantArithmetic(User->getOpcode(), dl,
  5785. User->getValueType(0), {O0, O1});
  5786. };
  5787. // FIXME: When the semantics of the interaction between select and undef
  5788. // are clearly defined, it may turn out to be unnecessary to break here.
  5789. SDValue TrueRes = TryFold(ConstTrue);
  5790. if (!TrueRes || TrueRes.isUndef())
  5791. break;
  5792. SDValue FalseRes = TryFold(ConstFalse);
  5793. if (!FalseRes || FalseRes.isUndef())
  5794. break;
  5795. // For us to materialize these using one instruction, we must be able to
  5796. // represent them as signed 16-bit integers.
  5797. uint64_t True = cast<ConstantSDNode>(TrueRes)->getZExtValue(),
  5798. False = cast<ConstantSDNode>(FalseRes)->getZExtValue();
  5799. if (!isInt<16>(True) || !isInt<16>(False))
  5800. break;
  5801. // We can replace User with a new SELECT node, and try again to see if we
  5802. // can fold the select with its user.
  5803. Res = CurDAG->getSelect(dl, User->getValueType(0), Cond, TrueRes, FalseRes);
  5804. N = User;
  5805. ConstTrue = TrueRes;
  5806. ConstFalse = FalseRes;
  5807. } while (N->hasOneUse());
  5808. }
  5809. void PPCDAGToDAGISel::PreprocessISelDAG() {
  5810. SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
  5811. bool MadeChange = false;
  5812. while (Position != CurDAG->allnodes_begin()) {
  5813. SDNode *N = &*--Position;
  5814. if (N->use_empty())
  5815. continue;
  5816. SDValue Res;
  5817. switch (N->getOpcode()) {
  5818. default: break;
  5819. case ISD::OR:
  5820. Res = combineToCMPB(N);
  5821. break;
  5822. }
  5823. if (!Res)
  5824. foldBoolExts(Res, N);
  5825. if (Res) {
  5826. LLVM_DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld: ");
  5827. LLVM_DEBUG(N->dump(CurDAG));
  5828. LLVM_DEBUG(dbgs() << "\nNew: ");
  5829. LLVM_DEBUG(Res.getNode()->dump(CurDAG));
  5830. LLVM_DEBUG(dbgs() << "\n");
  5831. CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
  5832. MadeChange = true;
  5833. }
  5834. }
  5835. if (MadeChange)
  5836. CurDAG->RemoveDeadNodes();
  5837. }
  5838. /// PostprocessISelDAG - Perform some late peephole optimizations
  5839. /// on the DAG representation.
  5840. void PPCDAGToDAGISel::PostprocessISelDAG() {
  5841. // Skip peepholes at -O0.
  5842. if (TM.getOptLevel() == CodeGenOpt::None)
  5843. return;
  5844. PeepholePPC64();
  5845. PeepholeCROps();
  5846. PeepholePPC64ZExt();
  5847. }
  5848. // Check if all users of this node will become isel where the second operand
  5849. // is the constant zero. If this is so, and if we can negate the condition,
  5850. // then we can flip the true and false operands. This will allow the zero to
  5851. // be folded with the isel so that we don't need to materialize a register
  5852. // containing zero.
  5853. bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) {
  5854. for (const SDNode *User : N->uses()) {
  5855. if (!User->isMachineOpcode())
  5856. return false;
  5857. if (User->getMachineOpcode() != PPC::SELECT_I4 &&
  5858. User->getMachineOpcode() != PPC::SELECT_I8)
  5859. return false;
  5860. SDNode *Op1 = User->getOperand(1).getNode();
  5861. SDNode *Op2 = User->getOperand(2).getNode();
  5862. // If we have a degenerate select with two equal operands, swapping will
  5863. // not do anything, and we may run into an infinite loop.
  5864. if (Op1 == Op2)
  5865. return false;
  5866. if (!Op2->isMachineOpcode())
  5867. return false;
  5868. if (Op2->getMachineOpcode() != PPC::LI &&
  5869. Op2->getMachineOpcode() != PPC::LI8)
  5870. return false;
  5871. ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op2->getOperand(0));
  5872. if (!C)
  5873. return false;
  5874. if (!C->isZero())
  5875. return false;
  5876. }
  5877. return true;
  5878. }
  5879. void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode *N) {
  5880. SmallVector<SDNode *, 4> ToReplace;
  5881. for (SDNode *User : N->uses()) {
  5882. assert((User->getMachineOpcode() == PPC::SELECT_I4 ||
  5883. User->getMachineOpcode() == PPC::SELECT_I8) &&
  5884. "Must have all select users");
  5885. ToReplace.push_back(User);
  5886. }
  5887. for (SDNode *User : ToReplace) {
  5888. SDNode *ResNode =
  5889. CurDAG->getMachineNode(User->getMachineOpcode(), SDLoc(User),
  5890. User->getValueType(0), User->getOperand(0),
  5891. User->getOperand(2),
  5892. User->getOperand(1));
  5893. LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: ");
  5894. LLVM_DEBUG(User->dump(CurDAG));
  5895. LLVM_DEBUG(dbgs() << "\nNew: ");
  5896. LLVM_DEBUG(ResNode->dump(CurDAG));
  5897. LLVM_DEBUG(dbgs() << "\n");
  5898. ReplaceUses(User, ResNode);
  5899. }
  5900. }
  5901. void PPCDAGToDAGISel::PeepholeCROps() {
  5902. bool IsModified;
  5903. do {
  5904. IsModified = false;
  5905. for (SDNode &Node : CurDAG->allnodes()) {
  5906. MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node);
  5907. if (!MachineNode || MachineNode->use_empty())
  5908. continue;
  5909. SDNode *ResNode = MachineNode;
  5910. bool Op1Set = false, Op1Unset = false,
  5911. Op1Not = false,
  5912. Op2Set = false, Op2Unset = false,
  5913. Op2Not = false;
  5914. unsigned Opcode = MachineNode->getMachineOpcode();
  5915. switch (Opcode) {
  5916. default: break;
  5917. case PPC::CRAND:
  5918. case PPC::CRNAND:
  5919. case PPC::CROR:
  5920. case PPC::CRXOR:
  5921. case PPC::CRNOR:
  5922. case PPC::CREQV:
  5923. case PPC::CRANDC:
  5924. case PPC::CRORC: {
  5925. SDValue Op = MachineNode->getOperand(1);
  5926. if (Op.isMachineOpcode()) {
  5927. if (Op.getMachineOpcode() == PPC::CRSET)
  5928. Op2Set = true;
  5929. else if (Op.getMachineOpcode() == PPC::CRUNSET)
  5930. Op2Unset = true;
  5931. else if ((Op.getMachineOpcode() == PPC::CRNOR &&
  5932. Op.getOperand(0) == Op.getOperand(1)) ||
  5933. Op.getMachineOpcode() == PPC::CRNOT)
  5934. Op2Not = true;
  5935. }
  5936. [[fallthrough]];
  5937. }
  5938. case PPC::BC:
  5939. case PPC::BCn:
  5940. case PPC::SELECT_I4:
  5941. case PPC::SELECT_I8:
  5942. case PPC::SELECT_F4:
  5943. case PPC::SELECT_F8:
  5944. case PPC::SELECT_SPE:
  5945. case PPC::SELECT_SPE4:
  5946. case PPC::SELECT_VRRC:
  5947. case PPC::SELECT_VSFRC:
  5948. case PPC::SELECT_VSSRC:
  5949. case PPC::SELECT_VSRC: {
  5950. SDValue Op = MachineNode->getOperand(0);
  5951. if (Op.isMachineOpcode()) {
  5952. if (Op.getMachineOpcode() == PPC::CRSET)
  5953. Op1Set = true;
  5954. else if (Op.getMachineOpcode() == PPC::CRUNSET)
  5955. Op1Unset = true;
  5956. else if ((Op.getMachineOpcode() == PPC::CRNOR &&
  5957. Op.getOperand(0) == Op.getOperand(1)) ||
  5958. Op.getMachineOpcode() == PPC::CRNOT)
  5959. Op1Not = true;
  5960. }
  5961. }
  5962. break;
  5963. }
  5964. bool SelectSwap = false;
  5965. switch (Opcode) {
  5966. default: break;
  5967. case PPC::CRAND:
  5968. if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
  5969. // x & x = x
  5970. ResNode = MachineNode->getOperand(0).getNode();
  5971. else if (Op1Set)
  5972. // 1 & y = y
  5973. ResNode = MachineNode->getOperand(1).getNode();
  5974. else if (Op2Set)
  5975. // x & 1 = x
  5976. ResNode = MachineNode->getOperand(0).getNode();
  5977. else if (Op1Unset || Op2Unset)
  5978. // x & 0 = 0 & y = 0
  5979. ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
  5980. MVT::i1);
  5981. else if (Op1Not)
  5982. // ~x & y = andc(y, x)
  5983. ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
  5984. MVT::i1, MachineNode->getOperand(1),
  5985. MachineNode->getOperand(0).
  5986. getOperand(0));
  5987. else if (Op2Not)
  5988. // x & ~y = andc(x, y)
  5989. ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
  5990. MVT::i1, MachineNode->getOperand(0),
  5991. MachineNode->getOperand(1).
  5992. getOperand(0));
  5993. else if (AllUsersSelectZero(MachineNode)) {
  5994. ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),
  5995. MVT::i1, MachineNode->getOperand(0),
  5996. MachineNode->getOperand(1));
  5997. SelectSwap = true;
  5998. }
  5999. break;
  6000. case PPC::CRNAND:
  6001. if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
  6002. // nand(x, x) -> nor(x, x)
  6003. ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
  6004. MVT::i1, MachineNode->getOperand(0),
  6005. MachineNode->getOperand(0));
  6006. else if (Op1Set)
  6007. // nand(1, y) -> nor(y, y)
  6008. ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
  6009. MVT::i1, MachineNode->getOperand(1),
  6010. MachineNode->getOperand(1));
  6011. else if (Op2Set)
  6012. // nand(x, 1) -> nor(x, x)
  6013. ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
  6014. MVT::i1, MachineNode->getOperand(0),
  6015. MachineNode->getOperand(0));
  6016. else if (Op1Unset || Op2Unset)
  6017. // nand(x, 0) = nand(0, y) = 1
  6018. ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
  6019. MVT::i1);
  6020. else if (Op1Not)
  6021. // nand(~x, y) = ~(~x & y) = x | ~y = orc(x, y)
  6022. ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
  6023. MVT::i1, MachineNode->getOperand(0).
  6024. getOperand(0),
  6025. MachineNode->getOperand(1));
  6026. else if (Op2Not)
  6027. // nand(x, ~y) = ~x | y = orc(y, x)
  6028. ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
  6029. MVT::i1, MachineNode->getOperand(1).
  6030. getOperand(0),
  6031. MachineNode->getOperand(0));
  6032. else if (AllUsersSelectZero(MachineNode)) {
  6033. ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),
  6034. MVT::i1, MachineNode->getOperand(0),
  6035. MachineNode->getOperand(1));
  6036. SelectSwap = true;
  6037. }
  6038. break;
  6039. case PPC::CROR:
  6040. if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
  6041. // x | x = x
  6042. ResNode = MachineNode->getOperand(0).getNode();
  6043. else if (Op1Set || Op2Set)
  6044. // x | 1 = 1 | y = 1
  6045. ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
  6046. MVT::i1);
  6047. else if (Op1Unset)
  6048. // 0 | y = y
  6049. ResNode = MachineNode->getOperand(1).getNode();
  6050. else if (Op2Unset)
  6051. // x | 0 = x
  6052. ResNode = MachineNode->getOperand(0).getNode();
  6053. else if (Op1Not)
  6054. // ~x | y = orc(y, x)
  6055. ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
  6056. MVT::i1, MachineNode->getOperand(1),
  6057. MachineNode->getOperand(0).
  6058. getOperand(0));
  6059. else if (Op2Not)
  6060. // x | ~y = orc(x, y)
  6061. ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
  6062. MVT::i1, MachineNode->getOperand(0),
  6063. MachineNode->getOperand(1).
  6064. getOperand(0));
  6065. else if (AllUsersSelectZero(MachineNode)) {
  6066. ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
  6067. MVT::i1, MachineNode->getOperand(0),
  6068. MachineNode->getOperand(1));
  6069. SelectSwap = true;
  6070. }
  6071. break;
  6072. case PPC::CRXOR:
  6073. if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
  6074. // xor(x, x) = 0
  6075. ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
  6076. MVT::i1);
  6077. else if (Op1Set)
  6078. // xor(1, y) -> nor(y, y)
  6079. ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
  6080. MVT::i1, MachineNode->getOperand(1),
  6081. MachineNode->getOperand(1));
  6082. else if (Op2Set)
  6083. // xor(x, 1) -> nor(x, x)
  6084. ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
  6085. MVT::i1, MachineNode->getOperand(0),
  6086. MachineNode->getOperand(0));
  6087. else if (Op1Unset)
  6088. // xor(0, y) = y
  6089. ResNode = MachineNode->getOperand(1).getNode();
  6090. else if (Op2Unset)
  6091. // xor(x, 0) = x
  6092. ResNode = MachineNode->getOperand(0).getNode();
  6093. else if (Op1Not)
  6094. // xor(~x, y) = eqv(x, y)
  6095. ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
  6096. MVT::i1, MachineNode->getOperand(0).
  6097. getOperand(0),
  6098. MachineNode->getOperand(1));
  6099. else if (Op2Not)
  6100. // xor(x, ~y) = eqv(x, y)
  6101. ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
  6102. MVT::i1, MachineNode->getOperand(0),
  6103. MachineNode->getOperand(1).
  6104. getOperand(0));
  6105. else if (AllUsersSelectZero(MachineNode)) {
  6106. ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
  6107. MVT::i1, MachineNode->getOperand(0),
  6108. MachineNode->getOperand(1));
  6109. SelectSwap = true;
  6110. }
  6111. break;
  6112. case PPC::CRNOR:
  6113. if (Op1Set || Op2Set)
  6114. // nor(1, y) -> 0
  6115. ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
  6116. MVT::i1);
  6117. else if (Op1Unset)
  6118. // nor(0, y) = ~y -> nor(y, y)
  6119. ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
  6120. MVT::i1, MachineNode->getOperand(1),
  6121. MachineNode->getOperand(1));
  6122. else if (Op2Unset)
  6123. // nor(x, 0) = ~x
  6124. ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
  6125. MVT::i1, MachineNode->getOperand(0),
  6126. MachineNode->getOperand(0));
  6127. else if (Op1Not)
  6128. // nor(~x, y) = andc(x, y)
  6129. ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
  6130. MVT::i1, MachineNode->getOperand(0).
  6131. getOperand(0),
  6132. MachineNode->getOperand(1));
  6133. else if (Op2Not)
  6134. // nor(x, ~y) = andc(y, x)
  6135. ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
  6136. MVT::i1, MachineNode->getOperand(1).
  6137. getOperand(0),
  6138. MachineNode->getOperand(0));
  6139. else if (AllUsersSelectZero(MachineNode)) {
  6140. ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),
  6141. MVT::i1, MachineNode->getOperand(0),
  6142. MachineNode->getOperand(1));
  6143. SelectSwap = true;
  6144. }
  6145. break;
  6146. case PPC::CREQV:
  6147. if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
  6148. // eqv(x, x) = 1
  6149. ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
  6150. MVT::i1);
  6151. else if (Op1Set)
  6152. // eqv(1, y) = y
  6153. ResNode = MachineNode->getOperand(1).getNode();
  6154. else if (Op2Set)
  6155. // eqv(x, 1) = x
  6156. ResNode = MachineNode->getOperand(0).getNode();
  6157. else if (Op1Unset)
  6158. // eqv(0, y) = ~y -> nor(y, y)
  6159. ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
  6160. MVT::i1, MachineNode->getOperand(1),
  6161. MachineNode->getOperand(1));
  6162. else if (Op2Unset)
  6163. // eqv(x, 0) = ~x
  6164. ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
  6165. MVT::i1, MachineNode->getOperand(0),
  6166. MachineNode->getOperand(0));
  6167. else if (Op1Not)
  6168. // eqv(~x, y) = xor(x, y)
  6169. ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
  6170. MVT::i1, MachineNode->getOperand(0).
  6171. getOperand(0),
  6172. MachineNode->getOperand(1));
  6173. else if (Op2Not)
  6174. // eqv(x, ~y) = xor(x, y)
  6175. ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
  6176. MVT::i1, MachineNode->getOperand(0),
  6177. MachineNode->getOperand(1).
  6178. getOperand(0));
  6179. else if (AllUsersSelectZero(MachineNode)) {
  6180. ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
  6181. MVT::i1, MachineNode->getOperand(0),
  6182. MachineNode->getOperand(1));
  6183. SelectSwap = true;
  6184. }
  6185. break;
  6186. case PPC::CRANDC:
  6187. if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
  6188. // andc(x, x) = 0
  6189. ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
  6190. MVT::i1);
  6191. else if (Op1Set)
  6192. // andc(1, y) = ~y
  6193. ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
  6194. MVT::i1, MachineNode->getOperand(1),
  6195. MachineNode->getOperand(1));
  6196. else if (Op1Unset || Op2Set)
  6197. // andc(0, y) = andc(x, 1) = 0
  6198. ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
  6199. MVT::i1);
  6200. else if (Op2Unset)
  6201. // andc(x, 0) = x
  6202. ResNode = MachineNode->getOperand(0).getNode();
  6203. else if (Op1Not)
  6204. // andc(~x, y) = ~(x | y) = nor(x, y)
  6205. ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
  6206. MVT::i1, MachineNode->getOperand(0).
  6207. getOperand(0),
  6208. MachineNode->getOperand(1));
  6209. else if (Op2Not)
  6210. // andc(x, ~y) = x & y
  6211. ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),
  6212. MVT::i1, MachineNode->getOperand(0),
  6213. MachineNode->getOperand(1).
  6214. getOperand(0));
  6215. else if (AllUsersSelectZero(MachineNode)) {
  6216. ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
  6217. MVT::i1, MachineNode->getOperand(1),
  6218. MachineNode->getOperand(0));
  6219. SelectSwap = true;
  6220. }
  6221. break;
  6222. case PPC::CRORC:
  6223. if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
  6224. // orc(x, x) = 1
  6225. ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
  6226. MVT::i1);
  6227. else if (Op1Set || Op2Unset)
  6228. // orc(1, y) = orc(x, 0) = 1
  6229. ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
  6230. MVT::i1);
  6231. else if (Op2Set)
  6232. // orc(x, 1) = x
  6233. ResNode = MachineNode->getOperand(0).getNode();
  6234. else if (Op1Unset)
  6235. // orc(0, y) = ~y
  6236. ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
  6237. MVT::i1, MachineNode->getOperand(1),
  6238. MachineNode->getOperand(1));
  6239. else if (Op1Not)
  6240. // orc(~x, y) = ~(x & y) = nand(x, y)
  6241. ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),
  6242. MVT::i1, MachineNode->getOperand(0).
  6243. getOperand(0),
  6244. MachineNode->getOperand(1));
  6245. else if (Op2Not)
  6246. // orc(x, ~y) = x | y
  6247. ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),
  6248. MVT::i1, MachineNode->getOperand(0),
  6249. MachineNode->getOperand(1).
  6250. getOperand(0));
  6251. else if (AllUsersSelectZero(MachineNode)) {
  6252. ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
  6253. MVT::i1, MachineNode->getOperand(1),
  6254. MachineNode->getOperand(0));
  6255. SelectSwap = true;
  6256. }
  6257. break;
  6258. case PPC::SELECT_I4:
  6259. case PPC::SELECT_I8:
  6260. case PPC::SELECT_F4:
  6261. case PPC::SELECT_F8:
  6262. case PPC::SELECT_SPE:
  6263. case PPC::SELECT_SPE4:
  6264. case PPC::SELECT_VRRC:
  6265. case PPC::SELECT_VSFRC:
  6266. case PPC::SELECT_VSSRC:
  6267. case PPC::SELECT_VSRC:
  6268. if (Op1Set)
  6269. ResNode = MachineNode->getOperand(1).getNode();
  6270. else if (Op1Unset)
  6271. ResNode = MachineNode->getOperand(2).getNode();
  6272. else if (Op1Not)
  6273. ResNode = CurDAG->getMachineNode(MachineNode->getMachineOpcode(),
  6274. SDLoc(MachineNode),
  6275. MachineNode->getValueType(0),
  6276. MachineNode->getOperand(0).
  6277. getOperand(0),
  6278. MachineNode->getOperand(2),
  6279. MachineNode->getOperand(1));
  6280. break;
  6281. case PPC::BC:
  6282. case PPC::BCn:
  6283. if (Op1Not)
  6284. ResNode = CurDAG->getMachineNode(Opcode == PPC::BC ? PPC::BCn :
  6285. PPC::BC,
  6286. SDLoc(MachineNode),
  6287. MVT::Other,
  6288. MachineNode->getOperand(0).
  6289. getOperand(0),
  6290. MachineNode->getOperand(1),
  6291. MachineNode->getOperand(2));
  6292. // FIXME: Handle Op1Set, Op1Unset here too.
  6293. break;
  6294. }
  6295. // If we're inverting this node because it is used only by selects that
  6296. // we'd like to swap, then swap the selects before the node replacement.
  6297. if (SelectSwap)
  6298. SwapAllSelectUsers(MachineNode);
  6299. if (ResNode != MachineNode) {
  6300. LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: ");
  6301. LLVM_DEBUG(MachineNode->dump(CurDAG));
  6302. LLVM_DEBUG(dbgs() << "\nNew: ");
  6303. LLVM_DEBUG(ResNode->dump(CurDAG));
  6304. LLVM_DEBUG(dbgs() << "\n");
  6305. ReplaceUses(MachineNode, ResNode);
  6306. IsModified = true;
  6307. }
  6308. }
  6309. if (IsModified)
  6310. CurDAG->RemoveDeadNodes();
  6311. } while (IsModified);
  6312. }
  6313. // Gather the set of 32-bit operations that are known to have their
  6314. // higher-order 32 bits zero, where ToPromote contains all such operations.
  6315. static bool PeepholePPC64ZExtGather(SDValue Op32,
  6316. SmallPtrSetImpl<SDNode *> &ToPromote) {
  6317. if (!Op32.isMachineOpcode())
  6318. return false;
  6319. // First, check for the "frontier" instructions (those that will clear the
  6320. // higher-order 32 bits.
  6321. // For RLWINM and RLWNM, we need to make sure that the mask does not wrap
  6322. // around. If it does not, then these instructions will clear the
  6323. // higher-order bits.
  6324. if ((Op32.getMachineOpcode() == PPC::RLWINM ||
  6325. Op32.getMachineOpcode() == PPC::RLWNM) &&
  6326. Op32.getConstantOperandVal(2) <= Op32.getConstantOperandVal(3)) {
  6327. ToPromote.insert(Op32.getNode());
  6328. return true;
  6329. }
  6330. // SLW and SRW always clear the higher-order bits.
  6331. if (Op32.getMachineOpcode() == PPC::SLW ||
  6332. Op32.getMachineOpcode() == PPC::SRW) {
  6333. ToPromote.insert(Op32.getNode());
  6334. return true;
  6335. }
  6336. // For LI and LIS, we need the immediate to be positive (so that it is not
  6337. // sign extended).
  6338. if (Op32.getMachineOpcode() == PPC::LI ||
  6339. Op32.getMachineOpcode() == PPC::LIS) {
  6340. if (!isUInt<15>(Op32.getConstantOperandVal(0)))
  6341. return false;
  6342. ToPromote.insert(Op32.getNode());
  6343. return true;
  6344. }
  6345. // LHBRX and LWBRX always clear the higher-order bits.
  6346. if (Op32.getMachineOpcode() == PPC::LHBRX ||
  6347. Op32.getMachineOpcode() == PPC::LWBRX) {
  6348. ToPromote.insert(Op32.getNode());
  6349. return true;
  6350. }
  6351. // CNT[LT]ZW always produce a 64-bit value in [0,32], and so is zero extended.
  6352. if (Op32.getMachineOpcode() == PPC::CNTLZW ||
  6353. Op32.getMachineOpcode() == PPC::CNTTZW) {
  6354. ToPromote.insert(Op32.getNode());
  6355. return true;
  6356. }
  6357. // Next, check for those instructions we can look through.
  6358. // Assuming the mask does not wrap around, then the higher-order bits are
  6359. // taken directly from the first operand.
  6360. if (Op32.getMachineOpcode() == PPC::RLWIMI &&
  6361. Op32.getConstantOperandVal(3) <= Op32.getConstantOperandVal(4)) {
  6362. SmallPtrSet<SDNode *, 16> ToPromote1;
  6363. if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1))
  6364. return false;
  6365. ToPromote.insert(Op32.getNode());
  6366. ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
  6367. return true;
  6368. }
  6369. // For OR, the higher-order bits are zero if that is true for both operands.
  6370. // For SELECT_I4, the same is true (but the relevant operand numbers are
  6371. // shifted by 1).
  6372. if (Op32.getMachineOpcode() == PPC::OR ||
  6373. Op32.getMachineOpcode() == PPC::SELECT_I4) {
  6374. unsigned B = Op32.getMachineOpcode() == PPC::SELECT_I4 ? 1 : 0;
  6375. SmallPtrSet<SDNode *, 16> ToPromote1;
  6376. if (!PeepholePPC64ZExtGather(Op32.getOperand(B+0), ToPromote1))
  6377. return false;
  6378. if (!PeepholePPC64ZExtGather(Op32.getOperand(B+1), ToPromote1))
  6379. return false;
  6380. ToPromote.insert(Op32.getNode());
  6381. ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
  6382. return true;
  6383. }
  6384. // For ORI and ORIS, we need the higher-order bits of the first operand to be
  6385. // zero, and also for the constant to be positive (so that it is not sign
  6386. // extended).
  6387. if (Op32.getMachineOpcode() == PPC::ORI ||
  6388. Op32.getMachineOpcode() == PPC::ORIS) {
  6389. SmallPtrSet<SDNode *, 16> ToPromote1;
  6390. if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1))
  6391. return false;
  6392. if (!isUInt<15>(Op32.getConstantOperandVal(1)))
  6393. return false;
  6394. ToPromote.insert(Op32.getNode());
  6395. ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
  6396. return true;
  6397. }
  6398. // The higher-order bits of AND are zero if that is true for at least one of
  6399. // the operands.
  6400. if (Op32.getMachineOpcode() == PPC::AND) {
  6401. SmallPtrSet<SDNode *, 16> ToPromote1, ToPromote2;
  6402. bool Op0OK =
  6403. PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1);
  6404. bool Op1OK =
  6405. PeepholePPC64ZExtGather(Op32.getOperand(1), ToPromote2);
  6406. if (!Op0OK && !Op1OK)
  6407. return false;
  6408. ToPromote.insert(Op32.getNode());
  6409. if (Op0OK)
  6410. ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
  6411. if (Op1OK)
  6412. ToPromote.insert(ToPromote2.begin(), ToPromote2.end());
  6413. return true;
  6414. }
  6415. // For ANDI and ANDIS, the higher-order bits are zero if either that is true
  6416. // of the first operand, or if the second operand is positive (so that it is
  6417. // not sign extended).
  6418. if (Op32.getMachineOpcode() == PPC::ANDI_rec ||
  6419. Op32.getMachineOpcode() == PPC::ANDIS_rec) {
  6420. SmallPtrSet<SDNode *, 16> ToPromote1;
  6421. bool Op0OK =
  6422. PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1);
  6423. bool Op1OK = isUInt<15>(Op32.getConstantOperandVal(1));
  6424. if (!Op0OK && !Op1OK)
  6425. return false;
  6426. ToPromote.insert(Op32.getNode());
  6427. if (Op0OK)
  6428. ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
  6429. return true;
  6430. }
  6431. return false;
  6432. }
  6433. void PPCDAGToDAGISel::PeepholePPC64ZExt() {
  6434. if (!Subtarget->isPPC64())
  6435. return;
  6436. // When we zero-extend from i32 to i64, we use a pattern like this:
  6437. // def : Pat<(i64 (zext i32:$in)),
  6438. // (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32),
  6439. // 0, 32)>;
  6440. // There are several 32-bit shift/rotate instructions, however, that will
  6441. // clear the higher-order bits of their output, rendering the RLDICL
  6442. // unnecessary. When that happens, we remove it here, and redefine the
  6443. // relevant 32-bit operation to be a 64-bit operation.
  6444. SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
  6445. bool MadeChange = false;
  6446. while (Position != CurDAG->allnodes_begin()) {
  6447. SDNode *N = &*--Position;
  6448. // Skip dead nodes and any non-machine opcodes.
  6449. if (N->use_empty() || !N->isMachineOpcode())
  6450. continue;
  6451. if (N->getMachineOpcode() != PPC::RLDICL)
  6452. continue;
  6453. if (N->getConstantOperandVal(1) != 0 ||
  6454. N->getConstantOperandVal(2) != 32)
  6455. continue;
  6456. SDValue ISR = N->getOperand(0);
  6457. if (!ISR.isMachineOpcode() ||
  6458. ISR.getMachineOpcode() != TargetOpcode::INSERT_SUBREG)
  6459. continue;
  6460. if (!ISR.hasOneUse())
  6461. continue;
  6462. if (ISR.getConstantOperandVal(2) != PPC::sub_32)
  6463. continue;
  6464. SDValue IDef = ISR.getOperand(0);
  6465. if (!IDef.isMachineOpcode() ||
  6466. IDef.getMachineOpcode() != TargetOpcode::IMPLICIT_DEF)
  6467. continue;
  6468. // We now know that we're looking at a canonical i32 -> i64 zext. See if we
  6469. // can get rid of it.
  6470. SDValue Op32 = ISR->getOperand(1);
  6471. if (!Op32.isMachineOpcode())
  6472. continue;
  6473. // There are some 32-bit instructions that always clear the high-order 32
  6474. // bits, there are also some instructions (like AND) that we can look
  6475. // through.
  6476. SmallPtrSet<SDNode *, 16> ToPromote;
  6477. if (!PeepholePPC64ZExtGather(Op32, ToPromote))
  6478. continue;
  6479. // If the ToPromote set contains nodes that have uses outside of the set
  6480. // (except for the original INSERT_SUBREG), then abort the transformation.
  6481. bool OutsideUse = false;
  6482. for (SDNode *PN : ToPromote) {
  6483. for (SDNode *UN : PN->uses()) {
  6484. if (!ToPromote.count(UN) && UN != ISR.getNode()) {
  6485. OutsideUse = true;
  6486. break;
  6487. }
  6488. }
  6489. if (OutsideUse)
  6490. break;
  6491. }
  6492. if (OutsideUse)
  6493. continue;
  6494. MadeChange = true;
  6495. // We now know that this zero extension can be removed by promoting to
  6496. // nodes in ToPromote to 64-bit operations, where for operations in the
  6497. // frontier of the set, we need to insert INSERT_SUBREGs for their
  6498. // operands.
  6499. for (SDNode *PN : ToPromote) {
  6500. unsigned NewOpcode;
  6501. switch (PN->getMachineOpcode()) {
  6502. default:
  6503. llvm_unreachable("Don't know the 64-bit variant of this instruction");
  6504. case PPC::RLWINM: NewOpcode = PPC::RLWINM8; break;
  6505. case PPC::RLWNM: NewOpcode = PPC::RLWNM8; break;
  6506. case PPC::SLW: NewOpcode = PPC::SLW8; break;
  6507. case PPC::SRW: NewOpcode = PPC::SRW8; break;
  6508. case PPC::LI: NewOpcode = PPC::LI8; break;
  6509. case PPC::LIS: NewOpcode = PPC::LIS8; break;
  6510. case PPC::LHBRX: NewOpcode = PPC::LHBRX8; break;
  6511. case PPC::LWBRX: NewOpcode = PPC::LWBRX8; break;
  6512. case PPC::CNTLZW: NewOpcode = PPC::CNTLZW8; break;
  6513. case PPC::CNTTZW: NewOpcode = PPC::CNTTZW8; break;
  6514. case PPC::RLWIMI: NewOpcode = PPC::RLWIMI8; break;
  6515. case PPC::OR: NewOpcode = PPC::OR8; break;
  6516. case PPC::SELECT_I4: NewOpcode = PPC::SELECT_I8; break;
  6517. case PPC::ORI: NewOpcode = PPC::ORI8; break;
  6518. case PPC::ORIS: NewOpcode = PPC::ORIS8; break;
  6519. case PPC::AND: NewOpcode = PPC::AND8; break;
  6520. case PPC::ANDI_rec:
  6521. NewOpcode = PPC::ANDI8_rec;
  6522. break;
  6523. case PPC::ANDIS_rec:
  6524. NewOpcode = PPC::ANDIS8_rec;
  6525. break;
  6526. }
  6527. // Note: During the replacement process, the nodes will be in an
  6528. // inconsistent state (some instructions will have operands with values
  6529. // of the wrong type). Once done, however, everything should be right
  6530. // again.
  6531. SmallVector<SDValue, 4> Ops;
  6532. for (const SDValue &V : PN->ops()) {
  6533. if (!ToPromote.count(V.getNode()) && V.getValueType() == MVT::i32 &&
  6534. !isa<ConstantSDNode>(V)) {
  6535. SDValue ReplOpOps[] = { ISR.getOperand(0), V, ISR.getOperand(2) };
  6536. SDNode *ReplOp =
  6537. CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, SDLoc(V),
  6538. ISR.getNode()->getVTList(), ReplOpOps);
  6539. Ops.push_back(SDValue(ReplOp, 0));
  6540. } else {
  6541. Ops.push_back(V);
  6542. }
  6543. }
  6544. // Because all to-be-promoted nodes only have users that are other
  6545. // promoted nodes (or the original INSERT_SUBREG), we can safely replace
  6546. // the i32 result value type with i64.
  6547. SmallVector<EVT, 2> NewVTs;
  6548. SDVTList VTs = PN->getVTList();
  6549. for (unsigned i = 0, ie = VTs.NumVTs; i != ie; ++i)
  6550. if (VTs.VTs[i] == MVT::i32)
  6551. NewVTs.push_back(MVT::i64);
  6552. else
  6553. NewVTs.push_back(VTs.VTs[i]);
  6554. LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole morphing:\nOld: ");
  6555. LLVM_DEBUG(PN->dump(CurDAG));
  6556. CurDAG->SelectNodeTo(PN, NewOpcode, CurDAG->getVTList(NewVTs), Ops);
  6557. LLVM_DEBUG(dbgs() << "\nNew: ");
  6558. LLVM_DEBUG(PN->dump(CurDAG));
  6559. LLVM_DEBUG(dbgs() << "\n");
  6560. }
  6561. // Now we replace the original zero extend and its associated INSERT_SUBREG
  6562. // with the value feeding the INSERT_SUBREG (which has now been promoted to
  6563. // return an i64).
  6564. LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole replacing:\nOld: ");
  6565. LLVM_DEBUG(N->dump(CurDAG));
  6566. LLVM_DEBUG(dbgs() << "\nNew: ");
  6567. LLVM_DEBUG(Op32.getNode()->dump(CurDAG));
  6568. LLVM_DEBUG(dbgs() << "\n");
  6569. ReplaceUses(N, Op32.getNode());
  6570. }
  6571. if (MadeChange)
  6572. CurDAG->RemoveDeadNodes();
  6573. }
  6574. static bool isVSXSwap(SDValue N) {
  6575. if (!N->isMachineOpcode())
  6576. return false;
  6577. unsigned Opc = N->getMachineOpcode();
  6578. // Single-operand XXPERMDI or the regular XXPERMDI/XXSLDWI where the immediate
  6579. // operand is 2.
  6580. if (Opc == PPC::XXPERMDIs) {
  6581. return isa<ConstantSDNode>(N->getOperand(1)) &&
  6582. N->getConstantOperandVal(1) == 2;
  6583. } else if (Opc == PPC::XXPERMDI || Opc == PPC::XXSLDWI) {
  6584. return N->getOperand(0) == N->getOperand(1) &&
  6585. isa<ConstantSDNode>(N->getOperand(2)) &&
  6586. N->getConstantOperandVal(2) == 2;
  6587. }
  6588. return false;
  6589. }
  6590. // TODO: Make this complete and replace with a table-gen bit.
  6591. static bool isLaneInsensitive(SDValue N) {
  6592. if (!N->isMachineOpcode())
  6593. return false;
  6594. unsigned Opc = N->getMachineOpcode();
  6595. switch (Opc) {
  6596. default:
  6597. return false;
  6598. case PPC::VAVGSB:
  6599. case PPC::VAVGUB:
  6600. case PPC::VAVGSH:
  6601. case PPC::VAVGUH:
  6602. case PPC::VAVGSW:
  6603. case PPC::VAVGUW:
  6604. case PPC::VMAXFP:
  6605. case PPC::VMAXSB:
  6606. case PPC::VMAXUB:
  6607. case PPC::VMAXSH:
  6608. case PPC::VMAXUH:
  6609. case PPC::VMAXSW:
  6610. case PPC::VMAXUW:
  6611. case PPC::VMINFP:
  6612. case PPC::VMINSB:
  6613. case PPC::VMINUB:
  6614. case PPC::VMINSH:
  6615. case PPC::VMINUH:
  6616. case PPC::VMINSW:
  6617. case PPC::VMINUW:
  6618. case PPC::VADDFP:
  6619. case PPC::VADDUBM:
  6620. case PPC::VADDUHM:
  6621. case PPC::VADDUWM:
  6622. case PPC::VSUBFP:
  6623. case PPC::VSUBUBM:
  6624. case PPC::VSUBUHM:
  6625. case PPC::VSUBUWM:
  6626. case PPC::VAND:
  6627. case PPC::VANDC:
  6628. case PPC::VOR:
  6629. case PPC::VORC:
  6630. case PPC::VXOR:
  6631. case PPC::VNOR:
  6632. case PPC::VMULUWM:
  6633. return true;
  6634. }
  6635. }
  6636. // Try to simplify (xxswap (vec-op (xxswap) (xxswap))) where vec-op is
  6637. // lane-insensitive.
  6638. static void reduceVSXSwap(SDNode *N, SelectionDAG *DAG) {
  6639. // Our desired xxswap might be source of COPY_TO_REGCLASS.
  6640. // TODO: Can we put this a common method for DAG?
  6641. auto SkipRCCopy = [](SDValue V) {
  6642. while (V->isMachineOpcode() &&
  6643. V->getMachineOpcode() == TargetOpcode::COPY_TO_REGCLASS) {
  6644. // All values in the chain should have single use.
  6645. if (V->use_empty() || !V->use_begin()->isOnlyUserOf(V.getNode()))
  6646. return SDValue();
  6647. V = V->getOperand(0);
  6648. }
  6649. return V.hasOneUse() ? V : SDValue();
  6650. };
  6651. SDValue VecOp = SkipRCCopy(N->getOperand(0));
  6652. if (!VecOp || !isLaneInsensitive(VecOp))
  6653. return;
  6654. SDValue LHS = SkipRCCopy(VecOp.getOperand(0)),
  6655. RHS = SkipRCCopy(VecOp.getOperand(1));
  6656. if (!LHS || !RHS || !isVSXSwap(LHS) || !isVSXSwap(RHS))
  6657. return;
  6658. // These swaps may still have chain-uses here, count on dead code elimination
  6659. // in following passes to remove them.
  6660. DAG->ReplaceAllUsesOfValueWith(LHS, LHS.getOperand(0));
  6661. DAG->ReplaceAllUsesOfValueWith(RHS, RHS.getOperand(0));
  6662. DAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), N->getOperand(0));
  6663. }
  6664. void PPCDAGToDAGISel::PeepholePPC64() {
  6665. SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
  6666. while (Position != CurDAG->allnodes_begin()) {
  6667. SDNode *N = &*--Position;
  6668. // Skip dead nodes and any non-machine opcodes.
  6669. if (N->use_empty() || !N->isMachineOpcode())
  6670. continue;
  6671. if (isVSXSwap(SDValue(N, 0)))
  6672. reduceVSXSwap(N, CurDAG);
  6673. unsigned FirstOp;
  6674. unsigned StorageOpcode = N->getMachineOpcode();
  6675. bool RequiresMod4Offset = false;
  6676. switch (StorageOpcode) {
  6677. default: continue;
  6678. case PPC::LWA:
  6679. case PPC::LD:
  6680. case PPC::DFLOADf64:
  6681. case PPC::DFLOADf32:
  6682. RequiresMod4Offset = true;
  6683. [[fallthrough]];
  6684. case PPC::LBZ:
  6685. case PPC::LBZ8:
  6686. case PPC::LFD:
  6687. case PPC::LFS:
  6688. case PPC::LHA:
  6689. case PPC::LHA8:
  6690. case PPC::LHZ:
  6691. case PPC::LHZ8:
  6692. case PPC::LWZ:
  6693. case PPC::LWZ8:
  6694. FirstOp = 0;
  6695. break;
  6696. case PPC::STD:
  6697. case PPC::DFSTOREf64:
  6698. case PPC::DFSTOREf32:
  6699. RequiresMod4Offset = true;
  6700. [[fallthrough]];
  6701. case PPC::STB:
  6702. case PPC::STB8:
  6703. case PPC::STFD:
  6704. case PPC::STFS:
  6705. case PPC::STH:
  6706. case PPC::STH8:
  6707. case PPC::STW:
  6708. case PPC::STW8:
  6709. FirstOp = 1;
  6710. break;
  6711. }
  6712. // If this is a load or store with a zero offset, or within the alignment,
  6713. // we may be able to fold an add-immediate into the memory operation.
  6714. // The check against alignment is below, as it can't occur until we check
  6715. // the arguments to N
  6716. if (!isa<ConstantSDNode>(N->getOperand(FirstOp)))
  6717. continue;
  6718. SDValue Base = N->getOperand(FirstOp + 1);
  6719. if (!Base.isMachineOpcode())
  6720. continue;
  6721. unsigned Flags = 0;
  6722. bool ReplaceFlags = true;
  6723. // When the feeding operation is an add-immediate of some sort,
  6724. // determine whether we need to add relocation information to the
  6725. // target flags on the immediate operand when we fold it into the
  6726. // load instruction.
  6727. //
  6728. // For something like ADDItocL, the relocation information is
  6729. // inferred from the opcode; when we process it in the AsmPrinter,
  6730. // we add the necessary relocation there. A load, though, can receive
  6731. // relocation from various flavors of ADDIxxx, so we need to carry
  6732. // the relocation information in the target flags.
  6733. switch (Base.getMachineOpcode()) {
  6734. default: continue;
  6735. case PPC::ADDI8:
  6736. case PPC::ADDI:
  6737. // In some cases (such as TLS) the relocation information
  6738. // is already in place on the operand, so copying the operand
  6739. // is sufficient.
  6740. ReplaceFlags = false;
  6741. // For these cases, the immediate may not be divisible by 4, in
  6742. // which case the fold is illegal for DS-form instructions. (The
  6743. // other cases provide aligned addresses and are always safe.)
  6744. if (RequiresMod4Offset &&
  6745. (!isa<ConstantSDNode>(Base.getOperand(1)) ||
  6746. Base.getConstantOperandVal(1) % 4 != 0))
  6747. continue;
  6748. break;
  6749. case PPC::ADDIdtprelL:
  6750. Flags = PPCII::MO_DTPREL_LO;
  6751. break;
  6752. case PPC::ADDItlsldL:
  6753. Flags = PPCII::MO_TLSLD_LO;
  6754. break;
  6755. case PPC::ADDItocL:
  6756. Flags = PPCII::MO_TOC_LO;
  6757. break;
  6758. }
  6759. SDValue ImmOpnd = Base.getOperand(1);
  6760. // On PPC64, the TOC base pointer is guaranteed by the ABI only to have
  6761. // 8-byte alignment, and so we can only use offsets less than 8 (otherwise,
  6762. // we might have needed different @ha relocation values for the offset
  6763. // pointers).
  6764. int MaxDisplacement = 7;
  6765. if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
  6766. const GlobalValue *GV = GA->getGlobal();
  6767. Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());
  6768. MaxDisplacement = std::min((int)Alignment.value() - 1, MaxDisplacement);
  6769. }
  6770. bool UpdateHBase = false;
  6771. SDValue HBase = Base.getOperand(0);
  6772. int Offset = N->getConstantOperandVal(FirstOp);
  6773. if (ReplaceFlags) {
  6774. if (Offset < 0 || Offset > MaxDisplacement) {
  6775. // If we have a addi(toc@l)/addis(toc@ha) pair, and the addis has only
  6776. // one use, then we can do this for any offset, we just need to also
  6777. // update the offset (i.e. the symbol addend) on the addis also.
  6778. if (Base.getMachineOpcode() != PPC::ADDItocL)
  6779. continue;
  6780. if (!HBase.isMachineOpcode() ||
  6781. HBase.getMachineOpcode() != PPC::ADDIStocHA8)
  6782. continue;
  6783. if (!Base.hasOneUse() || !HBase.hasOneUse())
  6784. continue;
  6785. SDValue HImmOpnd = HBase.getOperand(1);
  6786. if (HImmOpnd != ImmOpnd)
  6787. continue;
  6788. UpdateHBase = true;
  6789. }
  6790. } else {
  6791. // If we're directly folding the addend from an addi instruction, then:
  6792. // 1. In general, the offset on the memory access must be zero.
  6793. // 2. If the addend is a constant, then it can be combined with a
  6794. // non-zero offset, but only if the result meets the encoding
  6795. // requirements.
  6796. if (auto *C = dyn_cast<ConstantSDNode>(ImmOpnd)) {
  6797. Offset += C->getSExtValue();
  6798. if (RequiresMod4Offset && (Offset % 4) != 0)
  6799. continue;
  6800. if (!isInt<16>(Offset))
  6801. continue;
  6802. ImmOpnd = CurDAG->getTargetConstant(Offset, SDLoc(ImmOpnd),
  6803. ImmOpnd.getValueType());
  6804. } else if (Offset != 0) {
  6805. continue;
  6806. }
  6807. }
  6808. // We found an opportunity. Reverse the operands from the add
  6809. // immediate and substitute them into the load or store. If
  6810. // needed, update the target flags for the immediate operand to
  6811. // reflect the necessary relocation information.
  6812. LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: ");
  6813. LLVM_DEBUG(Base->dump(CurDAG));
  6814. LLVM_DEBUG(dbgs() << "\nN: ");
  6815. LLVM_DEBUG(N->dump(CurDAG));
  6816. LLVM_DEBUG(dbgs() << "\n");
  6817. // If the relocation information isn't already present on the
  6818. // immediate operand, add it now.
  6819. if (ReplaceFlags) {
  6820. if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
  6821. SDLoc dl(GA);
  6822. const GlobalValue *GV = GA->getGlobal();
  6823. Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());
  6824. // We can't perform this optimization for data whose alignment
  6825. // is insufficient for the instruction encoding.
  6826. if (Alignment < 4 && (RequiresMod4Offset || (Offset % 4) != 0)) {
  6827. LLVM_DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n");
  6828. continue;
  6829. }
  6830. ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, Offset, Flags);
  6831. } else if (ConstantPoolSDNode *CP =
  6832. dyn_cast<ConstantPoolSDNode>(ImmOpnd)) {
  6833. const Constant *C = CP->getConstVal();
  6834. ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64, CP->getAlign(),
  6835. Offset, Flags);
  6836. }
  6837. }
  6838. if (FirstOp == 1) // Store
  6839. (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd,
  6840. Base.getOperand(0), N->getOperand(3));
  6841. else // Load
  6842. (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0),
  6843. N->getOperand(2));
  6844. if (UpdateHBase)
  6845. (void)CurDAG->UpdateNodeOperands(HBase.getNode(), HBase.getOperand(0),
  6846. ImmOpnd);
  6847. // The add-immediate may now be dead, in which case remove it.
  6848. if (Base.getNode()->use_empty())
  6849. CurDAG->RemoveDeadNode(Base.getNode());
  6850. }
  6851. }
  6852. /// createPPCISelDag - This pass converts a legalized DAG into a
  6853. /// PowerPC-specific DAG, ready for instruction scheduling.
  6854. ///
  6855. FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM,
  6856. CodeGenOpt::Level OptLevel) {
  6857. return new PPCDAGToDAGISel(TM, OptLevel);
  6858. }