TargetLowering.cpp 415 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834683568366837683868396840684168426843684468456846684768486849685068516852685368546855685668576858685968606861686268636864686568666867686868696870687168726873687468756876687768786879688068816882688368846885688668876888688968906891689268936894689568966897689868996900690169026903690469056906690769086909691069116912691369146915691669176918691969206921692269236924692569266927692869296930693169326933693469356936693769386939694069416942694369446945694669476948694969506951695269536954695569566957695869596960696169626963696469656966696769686969697069716972697369746975697669776978697969806981698269836984698569866987698869896990699169926993699469956996699769986999700070017002700370047005700670077008700970107011701270137014701570167017701870197020702170227023702470257026702770287029703070317032703370347035703670377038703970407041704270437044704570467047704870497050705170527053705470557056705770587059706070617062706370647065706670677068706970707071707270737074707570767077707870797080708170827083708470857086708770887089709070917092709370947095709670977098709971007101710271037104710571067107710871097110711171127113711471157116711771187119712071217122712371247125712671277128712971307131713271337134713571367137713871397140714171427143714471457146714771487149715071517152715371547155715671577158715971607161716271637164716571667167716871697170717171727173717471757176717771787179718071817182718371847185718671877188718971907191719271937194719571967197719871997200720172027203720472057206720772087209721072117212721372147215721672177218721972207221722272237224722572267227722872297230723172327233723472357236723772387239724072417242724372447245724672477248724972507251725272537254725572567257725872597260726172627263726472657266726772687269727072717272727372747275727672777278727972807281728272837284728572867287728872897290729172927293729472957296729772987299730073017302730373047305730673077308730973107311731273137314731573167317731873197320732173227323732473257326732773287329733073317332733373347335733673377338733973407341734273437344734573467347734873497350735173527353735473557356735773587359736073617362736373647365736673677368736973707371737273737374737573767377737873797380738173827383738473857386738773887389739073917392739373947395739673977398739974007401740274037404740574067407740874097410741174127413741474157416741774187419742074217422742374247425742674277428742974307431743274337434743574367437743874397440744174427443744474457446744774487449745074517452745374547455745674577458745974607461746274637464746574667467746874697470747174727473747474757476747774787479748074817482748374847485748674877488748974907491749274937494749574967497749874997500750175027503750475057506750775087509751075117512751375147515751675177518751975207521752275237524752575267527752875297530753175327533753475357536753775387539754075417542754375447545754675477548754975507551755275537554755575567557755875597560756175627563756475657566756775687569757075717572757375747575757675777578757975807581758275837584758575867587758875897590759175927593759475957596759775987599760076017602760376047605760676077608760976107611761276137614761576167617761876197620762176227623762476257626762776287629763076317632763376347635763676377638763976407641764276437644764576467647764876497650765176527653765476557656765776587659766076617662766376647665766676677668766976707671767276737674767576767677767876797680768176827683768476857686768776887689769076917692769376947695769676977698769977007701770277037704770577067707770877097710771177127713771477157716771777187719772077217722772377247725772677277728772977307731773277337734773577367737773877397740774177427743774477457746774777487749775077517752775377547755775677577758775977607761776277637764776577667767776877697770777177727773777477757776777777787779778077817782778377847785778677877788778977907791779277937794779577967797779877997800780178027803780478057806780778087809781078117812781378147815781678177818781978207821782278237824782578267827782878297830783178327833783478357836783778387839784078417842784378447845784678477848784978507851785278537854785578567857785878597860786178627863786478657866786778687869787078717872787378747875787678777878787978807881788278837884788578867887788878897890789178927893789478957896789778987899790079017902790379047905790679077908790979107911791279137914791579167917791879197920792179227923792479257926792779287929793079317932793379347935793679377938793979407941794279437944794579467947794879497950795179527953795479557956795779587959796079617962796379647965796679677968796979707971797279737974797579767977797879797980798179827983798479857986798779887989799079917992799379947995799679977998799980008001800280038004800580068007800880098010801180128013801480158016801780188019802080218022802380248025802680278028802980308031803280338034803580368037803880398040804180428043804480458046804780488049805080518052805380548055805680578058805980608061806280638064806580668067806880698070807180728073807480758076807780788079808080818082808380848085808680878088808980908091809280938094809580968097809880998100810181028103810481058106810781088109811081118112811381148115811681178118811981208121812281238124812581268127812881298130813181328133813481358136813781388139814081418142814381448145814681478148814981508151815281538154815581568157815881598160816181628163816481658166816781688169817081718172817381748175817681778178817981808181818281838184818581868187818881898190819181928193819481958196819781988199820082018202820382048205820682078208820982108211821282138214821582168217821882198220822182228223822482258226822782288229823082318232823382348235823682378238823982408241824282438244824582468247824882498250825182528253825482558256825782588259826082618262826382648265826682678268826982708271827282738274827582768277827882798280828182828283828482858286828782888289829082918292829382948295829682978298829983008301830283038304830583068307830883098310831183128313831483158316831783188319832083218322832383248325832683278328832983308331833283338334833583368337833883398340834183428343834483458346834783488349835083518352835383548355835683578358835983608361836283638364836583668367836883698370837183728373837483758376837783788379838083818382838383848385838683878388838983908391839283938394839583968397839883998400840184028403840484058406840784088409841084118412841384148415841684178418841984208421842284238424842584268427842884298430843184328433843484358436843784388439844084418442844384448445844684478448844984508451845284538454845584568457845884598460846184628463846484658466846784688469847084718472847384748475847684778478847984808481848284838484848584868487848884898490849184928493849484958496849784988499850085018502850385048505850685078508850985108511851285138514851585168517851885198520852185228523852485258526852785288529853085318532853385348535853685378538853985408541854285438544854585468547854885498550855185528553855485558556855785588559856085618562856385648565856685678568856985708571857285738574857585768577857885798580858185828583858485858586858785888589859085918592859385948595859685978598859986008601860286038604860586068607860886098610861186128613861486158616861786188619862086218622862386248625862686278628862986308631863286338634863586368637863886398640864186428643864486458646864786488649865086518652865386548655865686578658865986608661866286638664866586668667866886698670867186728673867486758676867786788679868086818682868386848685868686878688868986908691869286938694869586968697869886998700870187028703870487058706870787088709871087118712871387148715871687178718871987208721872287238724872587268727872887298730873187328733873487358736873787388739874087418742874387448745874687478748874987508751875287538754875587568757875887598760876187628763876487658766876787688769877087718772877387748775877687778778877987808781878287838784878587868787878887898790879187928793879487958796879787988799880088018802880388048805880688078808880988108811881288138814881588168817881888198820882188228823882488258826882788288829883088318832883388348835883688378838883988408841884288438844884588468847884888498850885188528853885488558856885788588859886088618862886388648865886688678868886988708871887288738874887588768877887888798880888188828883888488858886888788888889889088918892889388948895889688978898889989008901890289038904890589068907890889098910891189128913891489158916891789188919892089218922892389248925892689278928892989308931893289338934893589368937893889398940894189428943894489458946894789488949895089518952895389548955895689578958895989608961896289638964896589668967896889698970897189728973897489758976897789788979898089818982898389848985898689878988898989908991899289938994899589968997899889999000900190029003900490059006900790089009901090119012901390149015901690179018901990209021902290239024902590269027902890299030903190329033903490359036903790389039904090419042904390449045904690479048904990509051905290539054905590569057905890599060906190629063906490659066906790689069907090719072907390749075907690779078907990809081908290839084908590869087908890899090909190929093909490959096909790989099910091019102910391049105910691079108910991109111911291139114911591169117911891199120912191229123912491259126912791289129913091319132913391349135913691379138913991409141914291439144914591469147914891499150915191529153915491559156915791589159916091619162916391649165916691679168916991709171917291739174917591769177917891799180918191829183918491859186918791889189919091919192919391949195919691979198919992009201920292039204920592069207920892099210921192129213921492159216921792189219922092219222922392249225922692279228922992309231923292339234923592369237923892399240924192429243924492459246924792489249925092519252925392549255925692579258925992609261926292639264926592669267926892699270927192729273927492759276927792789279928092819282928392849285928692879288928992909291929292939294929592969297929892999300930193029303930493059306930793089309931093119312931393149315931693179318931993209321932293239324932593269327932893299330933193329333933493359336933793389339934093419342934393449345934693479348934993509351935293539354935593569357935893599360936193629363936493659366936793689369937093719372937393749375937693779378937993809381938293839384938593869387938893899390939193929393939493959396939793989399940094019402940394049405940694079408940994109411941294139414941594169417941894199420942194229423942494259426942794289429943094319432943394349435943694379438943994409441944294439444944594469447944894499450945194529453945494559456945794589459946094619462946394649465946694679468946994709471947294739474947594769477947894799480948194829483948494859486948794889489949094919492949394949495949694979498949995009501950295039504950595069507950895099510951195129513951495159516951795189519952095219522952395249525952695279528952995309531953295339534953595369537953895399540954195429543954495459546954795489549955095519552955395549555955695579558955995609561956295639564956595669567956895699570957195729573957495759576957795789579958095819582958395849585958695879588958995909591959295939594959595969597959895999600960196029603960496059606960796089609961096119612961396149615961696179618961996209621962296239624962596269627962896299630963196329633963496359636963796389639964096419642964396449645964696479648964996509651965296539654965596569657965896599660966196629663966496659666966796689669967096719672967396749675967696779678967996809681968296839684968596869687968896899690969196929693969496959696969796989699970097019702970397049705970697079708970997109711971297139714971597169717971897199720972197229723972497259726972797289729973097319732973397349735973697379738973997409741974297439744974597469747974897499750975197529753975497559756975797589759976097619762976397649765976697679768976997709771977297739774977597769777977897799780978197829783978497859786978797889789979097919792979397949795979697979798979998009801980298039804980598069807980898099810981198129813981498159816981798189819982098219822982398249825982698279828982998309831983298339834983598369837983898399840984198429843984498459846984798489849985098519852985398549855985698579858985998609861986298639864986598669867986898699870987198729873987498759876987798789879988098819882988398849885988698879888988998909891989298939894989598969897989898999900990199029903990499059906990799089909991099119912991399149915991699179918991999209921992299239924992599269927992899299930993199329933993499359936993799389939994099419942994399449945994699479948994999509951995299539954995599569957995899599960996199629963996499659966996799689969997099719972997399749975997699779978997999809981998299839984998599869987998899899990999199929993999499959996999799989999100001000110002100031000410005100061000710008100091001010011100121001310014100151001610017100181001910020100211002210023100241002510026100271002810029100301003110032100331003410035100361003710038100391004010041100421004310044100451004610047100481004910050100511005210053100541005510056100571005810059100601006110062100631006410065100661006710068100691007010071100721007310074100751007610077100781007910080100811008210083100841008510086100871008810089100901009110092100931009410095100961009710098100991010010101101021010310104101051010610107101081010910110101111011210113101141011510116101171011810119101201012110122101231012410125101261012710128101291013010131101321013310134101351013610137101381013910140101411014210143101441014510146101471014810149101501015110152101531015410155101561015710158101591016010161101621016310164101651016610167101681016910170101711017210173101741017510176101771017810179101801018110182101831018410185101861018710188101891019010191101921019310194101951019610197101981019910200102011020210203102041020510206102071020810209102101021110212102131021410215102161021710218102191022010221102221022310224102251022610227102281022910230102311023210233102341023510236102371023810239102401024110242102431024410245102461024710248102491025010251102521025310254102551025610257102581025910260102611026210263102641026510266102671026810269102701027110272102731027410275102761027710278102791028010281102821028310284102851028610287102881028910290102911029210293102941029510296102971029810299103001030110302103031030410305103061030710308103091031010311103121031310314103151031610317103181031910320103211032210323103241032510326103271032810329103301033110332103331033410335103361033710338103391034010341103421034310344103451034610347103481034910350103511035210353103541035510356103571035810359103601036110362103631036410365103661036710368103691037010371103721037310374103751037610377103781037910380103811038210383103841038510386103871038810389103901039110392103931039410395103961039710398103991040010401104021040310404104051040610407104081040910410104111041210413104141041510416104171041810419104201042110422104231042410425104261042710428104291043010431104321043310434104351043610437104381043910440104411044210443104441044510446104471044810449104501045110452104531045410455104561045710458104591046010461104621046310464104651046610467104681046910470104711047210473104741047510476104771047810479104801048110482104831048410485104861048710488104891049010491104921049310494104951049610497104981049910500105011050210503105041050510506105071050810509105101051110512105131051410515105161051710518105191052010521105221052310524105251052610527105281052910530105311053210533
  1. //===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This implements the TargetLowering class.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "llvm/CodeGen/TargetLowering.h"
  13. #include "llvm/ADT/STLExtras.h"
  14. #include "llvm/Analysis/VectorUtils.h"
  15. #include "llvm/CodeGen/CallingConvLower.h"
  16. #include "llvm/CodeGen/CodeGenCommonISel.h"
  17. #include "llvm/CodeGen/MachineFrameInfo.h"
  18. #include "llvm/CodeGen/MachineFunction.h"
  19. #include "llvm/CodeGen/MachineJumpTableInfo.h"
  20. #include "llvm/CodeGen/MachineRegisterInfo.h"
  21. #include "llvm/CodeGen/SelectionDAG.h"
  22. #include "llvm/CodeGen/TargetRegisterInfo.h"
  23. #include "llvm/IR/DataLayout.h"
  24. #include "llvm/IR/DerivedTypes.h"
  25. #include "llvm/IR/GlobalVariable.h"
  26. #include "llvm/IR/LLVMContext.h"
  27. #include "llvm/MC/MCAsmInfo.h"
  28. #include "llvm/MC/MCExpr.h"
  29. #include "llvm/Support/DivisionByConstantInfo.h"
  30. #include "llvm/Support/ErrorHandling.h"
  31. #include "llvm/Support/KnownBits.h"
  32. #include "llvm/Support/MathExtras.h"
  33. #include "llvm/Target/TargetMachine.h"
  34. #include <cctype>
  35. using namespace llvm;
  36. /// NOTE: The TargetMachine owns TLOF.
  37. TargetLowering::TargetLowering(const TargetMachine &tm)
  38. : TargetLoweringBase(tm) {}
  39. const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
  40. return nullptr;
  41. }
  42. bool TargetLowering::isPositionIndependent() const {
  43. return getTargetMachine().isPositionIndependent();
  44. }
  45. /// Check whether a given call node is in tail position within its function. If
  46. /// so, it sets Chain to the input chain of the tail call.
  47. bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
  48. SDValue &Chain) const {
  49. const Function &F = DAG.getMachineFunction().getFunction();
  50. // First, check if tail calls have been disabled in this function.
  51. if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
  52. return false;
  53. // Conservatively require the attributes of the call to match those of
  54. // the return. Ignore following attributes because they don't affect the
  55. // call sequence.
  56. AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
  57. for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
  58. Attribute::DereferenceableOrNull, Attribute::NoAlias,
  59. Attribute::NonNull, Attribute::NoUndef})
  60. CallerAttrs.removeAttribute(Attr);
  61. if (CallerAttrs.hasAttributes())
  62. return false;
  63. // It's not safe to eliminate the sign / zero extension of the return value.
  64. if (CallerAttrs.contains(Attribute::ZExt) ||
  65. CallerAttrs.contains(Attribute::SExt))
  66. return false;
  67. // Check if the only use is a function return node.
  68. return isUsedByReturnOnly(Node, Chain);
  69. }
  70. bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
  71. const uint32_t *CallerPreservedMask,
  72. const SmallVectorImpl<CCValAssign> &ArgLocs,
  73. const SmallVectorImpl<SDValue> &OutVals) const {
  74. for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
  75. const CCValAssign &ArgLoc = ArgLocs[I];
  76. if (!ArgLoc.isRegLoc())
  77. continue;
  78. MCRegister Reg = ArgLoc.getLocReg();
  79. // Only look at callee saved registers.
  80. if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
  81. continue;
  82. // Check that we pass the value used for the caller.
  83. // (We look for a CopyFromReg reading a virtual register that is used
  84. // for the function live-in value of register Reg)
  85. SDValue Value = OutVals[I];
  86. if (Value->getOpcode() == ISD::AssertZext)
  87. Value = Value.getOperand(0);
  88. if (Value->getOpcode() != ISD::CopyFromReg)
  89. return false;
  90. Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
  91. if (MRI.getLiveInPhysReg(ArgReg) != Reg)
  92. return false;
  93. }
  94. return true;
  95. }
  96. /// Set CallLoweringInfo attribute flags based on a call instruction
  97. /// and called function attributes.
  98. void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
  99. unsigned ArgIdx) {
  100. IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
  101. IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
  102. IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
  103. IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
  104. IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
  105. IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
  106. IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
  107. IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
  108. IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
  109. IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
  110. IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
  111. IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
  112. Alignment = Call->getParamStackAlign(ArgIdx);
  113. IndirectType = nullptr;
  114. assert(IsByVal + IsPreallocated + IsInAlloca + IsSRet <= 1 &&
  115. "multiple ABI attributes?");
  116. if (IsByVal) {
  117. IndirectType = Call->getParamByValType(ArgIdx);
  118. if (!Alignment)
  119. Alignment = Call->getParamAlign(ArgIdx);
  120. }
  121. if (IsPreallocated)
  122. IndirectType = Call->getParamPreallocatedType(ArgIdx);
  123. if (IsInAlloca)
  124. IndirectType = Call->getParamInAllocaType(ArgIdx);
  125. if (IsSRet)
  126. IndirectType = Call->getParamStructRetType(ArgIdx);
  127. }
  128. /// Generate a libcall taking the given operands as arguments and returning a
  129. /// result of type RetVT.
  130. std::pair<SDValue, SDValue>
  131. TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
  132. ArrayRef<SDValue> Ops,
  133. MakeLibCallOptions CallOptions,
  134. const SDLoc &dl,
  135. SDValue InChain) const {
  136. if (!InChain)
  137. InChain = DAG.getEntryNode();
  138. TargetLowering::ArgListTy Args;
  139. Args.reserve(Ops.size());
  140. TargetLowering::ArgListEntry Entry;
  141. for (unsigned i = 0; i < Ops.size(); ++i) {
  142. SDValue NewOp = Ops[i];
  143. Entry.Node = NewOp;
  144. Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
  145. Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(),
  146. CallOptions.IsSExt);
  147. Entry.IsZExt = !Entry.IsSExt;
  148. if (CallOptions.IsSoften &&
  149. !shouldExtendTypeInLibCall(CallOptions.OpsVTBeforeSoften[i])) {
  150. Entry.IsSExt = Entry.IsZExt = false;
  151. }
  152. Args.push_back(Entry);
  153. }
  154. if (LC == RTLIB::UNKNOWN_LIBCALL)
  155. report_fatal_error("Unsupported library call operation!");
  156. SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
  157. getPointerTy(DAG.getDataLayout()));
  158. Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
  159. TargetLowering::CallLoweringInfo CLI(DAG);
  160. bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt);
  161. bool zeroExtend = !signExtend;
  162. if (CallOptions.IsSoften &&
  163. !shouldExtendTypeInLibCall(CallOptions.RetVTBeforeSoften)) {
  164. signExtend = zeroExtend = false;
  165. }
  166. CLI.setDebugLoc(dl)
  167. .setChain(InChain)
  168. .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
  169. .setNoReturn(CallOptions.DoesNotReturn)
  170. .setDiscardResult(!CallOptions.IsReturnValueUsed)
  171. .setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization)
  172. .setSExtResult(signExtend)
  173. .setZExtResult(zeroExtend);
  174. return LowerCallTo(CLI);
  175. }
  176. bool TargetLowering::findOptimalMemOpLowering(
  177. std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
  178. unsigned SrcAS, const AttributeList &FuncAttributes) const {
  179. if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
  180. Op.getSrcAlign() < Op.getDstAlign())
  181. return false;
  182. EVT VT = getOptimalMemOpType(Op, FuncAttributes);
  183. if (VT == MVT::Other) {
  184. // Use the largest integer type whose alignment constraints are satisfied.
  185. // We only need to check DstAlign here as SrcAlign is always greater or
  186. // equal to DstAlign (or zero).
  187. VT = MVT::i64;
  188. if (Op.isFixedDstAlign())
  189. while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
  190. !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
  191. VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
  192. assert(VT.isInteger());
  193. // Find the largest legal integer type.
  194. MVT LVT = MVT::i64;
  195. while (!isTypeLegal(LVT))
  196. LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
  197. assert(LVT.isInteger());
  198. // If the type we've chosen is larger than the largest legal integer type
  199. // then use that instead.
  200. if (VT.bitsGT(LVT))
  201. VT = LVT;
  202. }
  203. unsigned NumMemOps = 0;
  204. uint64_t Size = Op.size();
  205. while (Size) {
  206. unsigned VTSize = VT.getSizeInBits() / 8;
  207. while (VTSize > Size) {
  208. // For now, only use non-vector load / store's for the left-over pieces.
  209. EVT NewVT = VT;
  210. unsigned NewVTSize;
  211. bool Found = false;
  212. if (VT.isVector() || VT.isFloatingPoint()) {
  213. NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
  214. if (isOperationLegalOrCustom(ISD::STORE, NewVT) &&
  215. isSafeMemOpType(NewVT.getSimpleVT()))
  216. Found = true;
  217. else if (NewVT == MVT::i64 &&
  218. isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
  219. isSafeMemOpType(MVT::f64)) {
  220. // i64 is usually not legal on 32-bit targets, but f64 may be.
  221. NewVT = MVT::f64;
  222. Found = true;
  223. }
  224. }
  225. if (!Found) {
  226. do {
  227. NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
  228. if (NewVT == MVT::i8)
  229. break;
  230. } while (!isSafeMemOpType(NewVT.getSimpleVT()));
  231. }
  232. NewVTSize = NewVT.getSizeInBits() / 8;
  233. // If the new VT cannot cover all of the remaining bits, then consider
  234. // issuing a (or a pair of) unaligned and overlapping load / store.
  235. unsigned Fast;
  236. if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
  237. allowsMisalignedMemoryAccesses(
  238. VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
  239. MachineMemOperand::MONone, &Fast) &&
  240. Fast)
  241. VTSize = Size;
  242. else {
  243. VT = NewVT;
  244. VTSize = NewVTSize;
  245. }
  246. }
  247. if (++NumMemOps > Limit)
  248. return false;
  249. MemOps.push_back(VT);
  250. Size -= VTSize;
  251. }
  252. return true;
  253. }
  254. /// Soften the operands of a comparison. This code is shared among BR_CC,
  255. /// SELECT_CC, and SETCC handlers.
  256. void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
  257. SDValue &NewLHS, SDValue &NewRHS,
  258. ISD::CondCode &CCCode,
  259. const SDLoc &dl, const SDValue OldLHS,
  260. const SDValue OldRHS) const {
  261. SDValue Chain;
  262. return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
  263. OldRHS, Chain);
  264. }
  265. void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
  266. SDValue &NewLHS, SDValue &NewRHS,
  267. ISD::CondCode &CCCode,
  268. const SDLoc &dl, const SDValue OldLHS,
  269. const SDValue OldRHS,
  270. SDValue &Chain,
  271. bool IsSignaling) const {
  272. // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
  273. // not supporting it. We can update this code when libgcc provides such
  274. // functions.
  275. assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
  276. && "Unsupported setcc type!");
  277. // Expand into one or more soft-fp libcall(s).
  278. RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
  279. bool ShouldInvertCC = false;
  280. switch (CCCode) {
  281. case ISD::SETEQ:
  282. case ISD::SETOEQ:
  283. LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
  284. (VT == MVT::f64) ? RTLIB::OEQ_F64 :
  285. (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
  286. break;
  287. case ISD::SETNE:
  288. case ISD::SETUNE:
  289. LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
  290. (VT == MVT::f64) ? RTLIB::UNE_F64 :
  291. (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
  292. break;
  293. case ISD::SETGE:
  294. case ISD::SETOGE:
  295. LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
  296. (VT == MVT::f64) ? RTLIB::OGE_F64 :
  297. (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
  298. break;
  299. case ISD::SETLT:
  300. case ISD::SETOLT:
  301. LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
  302. (VT == MVT::f64) ? RTLIB::OLT_F64 :
  303. (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
  304. break;
  305. case ISD::SETLE:
  306. case ISD::SETOLE:
  307. LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
  308. (VT == MVT::f64) ? RTLIB::OLE_F64 :
  309. (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
  310. break;
  311. case ISD::SETGT:
  312. case ISD::SETOGT:
  313. LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
  314. (VT == MVT::f64) ? RTLIB::OGT_F64 :
  315. (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
  316. break;
  317. case ISD::SETO:
  318. ShouldInvertCC = true;
  319. [[fallthrough]];
  320. case ISD::SETUO:
  321. LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
  322. (VT == MVT::f64) ? RTLIB::UO_F64 :
  323. (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
  324. break;
  325. case ISD::SETONE:
  326. // SETONE = O && UNE
  327. ShouldInvertCC = true;
  328. [[fallthrough]];
  329. case ISD::SETUEQ:
  330. LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
  331. (VT == MVT::f64) ? RTLIB::UO_F64 :
  332. (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
  333. LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
  334. (VT == MVT::f64) ? RTLIB::OEQ_F64 :
  335. (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
  336. break;
  337. default:
  338. // Invert CC for unordered comparisons
  339. ShouldInvertCC = true;
  340. switch (CCCode) {
  341. case ISD::SETULT:
  342. LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
  343. (VT == MVT::f64) ? RTLIB::OGE_F64 :
  344. (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
  345. break;
  346. case ISD::SETULE:
  347. LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
  348. (VT == MVT::f64) ? RTLIB::OGT_F64 :
  349. (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
  350. break;
  351. case ISD::SETUGT:
  352. LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
  353. (VT == MVT::f64) ? RTLIB::OLE_F64 :
  354. (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
  355. break;
  356. case ISD::SETUGE:
  357. LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
  358. (VT == MVT::f64) ? RTLIB::OLT_F64 :
  359. (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
  360. break;
  361. default: llvm_unreachable("Do not know how to soften this setcc!");
  362. }
  363. }
  364. // Use the target specific return value for comparison lib calls.
  365. EVT RetVT = getCmpLibcallReturnType();
  366. SDValue Ops[2] = {NewLHS, NewRHS};
  367. TargetLowering::MakeLibCallOptions CallOptions;
  368. EVT OpsVT[2] = { OldLHS.getValueType(),
  369. OldRHS.getValueType() };
  370. CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
  371. auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
  372. NewLHS = Call.first;
  373. NewRHS = DAG.getConstant(0, dl, RetVT);
  374. CCCode = getCmpLibcallCC(LC1);
  375. if (ShouldInvertCC) {
  376. assert(RetVT.isInteger());
  377. CCCode = getSetCCInverse(CCCode, RetVT);
  378. }
  379. if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
  380. // Update Chain.
  381. Chain = Call.second;
  382. } else {
  383. EVT SetCCVT =
  384. getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
  385. SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
  386. auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
  387. CCCode = getCmpLibcallCC(LC2);
  388. if (ShouldInvertCC)
  389. CCCode = getSetCCInverse(CCCode, RetVT);
  390. NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
  391. if (Chain)
  392. Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
  393. Call2.second);
  394. NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
  395. Tmp.getValueType(), Tmp, NewLHS);
  396. NewRHS = SDValue();
  397. }
  398. }
  399. /// Return the entry encoding for a jump table in the current function. The
  400. /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
  401. unsigned TargetLowering::getJumpTableEncoding() const {
  402. // In non-pic modes, just use the address of a block.
  403. if (!isPositionIndependent())
  404. return MachineJumpTableInfo::EK_BlockAddress;
  405. // In PIC mode, if the target supports a GPRel32 directive, use it.
  406. if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
  407. return MachineJumpTableInfo::EK_GPRel32BlockAddress;
  408. // Otherwise, use a label difference.
  409. return MachineJumpTableInfo::EK_LabelDifference32;
  410. }
  411. SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
  412. SelectionDAG &DAG) const {
  413. // If our PIC model is GP relative, use the global offset table as the base.
  414. unsigned JTEncoding = getJumpTableEncoding();
  415. if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
  416. (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress))
  417. return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(DAG.getDataLayout()));
  418. return Table;
  419. }
  420. /// This returns the relocation base for the given PIC jumptable, the same as
  421. /// getPICJumpTableRelocBase, but as an MCExpr.
  422. const MCExpr *
  423. TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
  424. unsigned JTI,MCContext &Ctx) const{
  425. // The normal PIC reloc base is the label at the start of the jump table.
  426. return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
  427. }
  428. bool
  429. TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
  430. const TargetMachine &TM = getTargetMachine();
  431. const GlobalValue *GV = GA->getGlobal();
  432. // If the address is not even local to this DSO we will have to load it from
  433. // a got and then add the offset.
  434. if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
  435. return false;
  436. // If the code is position independent we will have to add a base register.
  437. if (isPositionIndependent())
  438. return false;
  439. // Otherwise we can do it.
  440. return true;
  441. }
  442. //===----------------------------------------------------------------------===//
  443. // Optimization Methods
  444. //===----------------------------------------------------------------------===//
  445. /// If the specified instruction has a constant integer operand and there are
  446. /// bits set in that constant that are not demanded, then clear those bits and
  447. /// return true.
  448. bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
  449. const APInt &DemandedBits,
  450. const APInt &DemandedElts,
  451. TargetLoweringOpt &TLO) const {
  452. SDLoc DL(Op);
  453. unsigned Opcode = Op.getOpcode();
  454. // Do target-specific constant optimization.
  455. if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
  456. return TLO.New.getNode();
  457. // FIXME: ISD::SELECT, ISD::SELECT_CC
  458. switch (Opcode) {
  459. default:
  460. break;
  461. case ISD::XOR:
  462. case ISD::AND:
  463. case ISD::OR: {
  464. auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
  465. if (!Op1C || Op1C->isOpaque())
  466. return false;
  467. // If this is a 'not' op, don't touch it because that's a canonical form.
  468. const APInt &C = Op1C->getAPIntValue();
  469. if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
  470. return false;
  471. if (!C.isSubsetOf(DemandedBits)) {
  472. EVT VT = Op.getValueType();
  473. SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
  474. SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
  475. return TLO.CombineTo(Op, NewOp);
  476. }
  477. break;
  478. }
  479. }
  480. return false;
  481. }
  482. bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
  483. const APInt &DemandedBits,
  484. TargetLoweringOpt &TLO) const {
  485. EVT VT = Op.getValueType();
  486. APInt DemandedElts = VT.isVector()
  487. ? APInt::getAllOnes(VT.getVectorNumElements())
  488. : APInt(1, 1);
  489. return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
  490. }
  491. /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
  492. /// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
  493. /// generalized for targets with other types of implicit widening casts.
  494. bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
  495. const APInt &Demanded,
  496. TargetLoweringOpt &TLO) const {
  497. assert(Op.getNumOperands() == 2 &&
  498. "ShrinkDemandedOp only supports binary operators!");
  499. assert(Op.getNode()->getNumValues() == 1 &&
  500. "ShrinkDemandedOp only supports nodes with one result!");
  501. SelectionDAG &DAG = TLO.DAG;
  502. SDLoc dl(Op);
  503. // Early return, as this function cannot handle vector types.
  504. if (Op.getValueType().isVector())
  505. return false;
  506. // Don't do this if the node has another user, which may require the
  507. // full value.
  508. if (!Op.getNode()->hasOneUse())
  509. return false;
  510. // Search for the smallest integer type with free casts to and from
  511. // Op's type. For expedience, just check power-of-2 integer types.
  512. const TargetLowering &TLI = DAG.getTargetLoweringInfo();
  513. unsigned DemandedSize = Demanded.getActiveBits();
  514. unsigned SmallVTBits = DemandedSize;
  515. if (!isPowerOf2_32(SmallVTBits))
  516. SmallVTBits = NextPowerOf2(SmallVTBits);
  517. for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
  518. EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
  519. if (TLI.isTruncateFree(Op.getValueType(), SmallVT) &&
  520. TLI.isZExtFree(SmallVT, Op.getValueType())) {
  521. // We found a type with free casts.
  522. SDValue X = DAG.getNode(
  523. Op.getOpcode(), dl, SmallVT,
  524. DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
  525. DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
  526. assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
  527. SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), X);
  528. return TLO.CombineTo(Op, Z);
  529. }
  530. }
  531. return false;
  532. }
  533. bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
  534. DAGCombinerInfo &DCI) const {
  535. SelectionDAG &DAG = DCI.DAG;
  536. TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
  537. !DCI.isBeforeLegalizeOps());
  538. KnownBits Known;
  539. bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
  540. if (Simplified) {
  541. DCI.AddToWorklist(Op.getNode());
  542. DCI.CommitTargetLoweringOpt(TLO);
  543. }
  544. return Simplified;
  545. }
  546. bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
  547. const APInt &DemandedElts,
  548. DAGCombinerInfo &DCI) const {
  549. SelectionDAG &DAG = DCI.DAG;
  550. TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
  551. !DCI.isBeforeLegalizeOps());
  552. KnownBits Known;
  553. bool Simplified =
  554. SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
  555. if (Simplified) {
  556. DCI.AddToWorklist(Op.getNode());
  557. DCI.CommitTargetLoweringOpt(TLO);
  558. }
  559. return Simplified;
  560. }
  561. bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
  562. KnownBits &Known,
  563. TargetLoweringOpt &TLO,
  564. unsigned Depth,
  565. bool AssumeSingleUse) const {
  566. EVT VT = Op.getValueType();
  567. // Since the number of lanes in a scalable vector is unknown at compile time,
  568. // we track one bit which is implicitly broadcast to all lanes. This means
  569. // that all lanes in a scalable vector are considered demanded.
  570. APInt DemandedElts = VT.isFixedLengthVector()
  571. ? APInt::getAllOnes(VT.getVectorNumElements())
  572. : APInt(1, 1);
  573. return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
  574. AssumeSingleUse);
  575. }
  576. // TODO: Under what circumstances can we create nodes? Constant folding?
  577. SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
  578. SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
  579. SelectionDAG &DAG, unsigned Depth) const {
  580. EVT VT = Op.getValueType();
  581. // Limit search depth.
  582. if (Depth >= SelectionDAG::MaxRecursionDepth)
  583. return SDValue();
  584. // Ignore UNDEFs.
  585. if (Op.isUndef())
  586. return SDValue();
  587. // Not demanding any bits/elts from Op.
  588. if (DemandedBits == 0 || DemandedElts == 0)
  589. return DAG.getUNDEF(VT);
  590. bool IsLE = DAG.getDataLayout().isLittleEndian();
  591. unsigned NumElts = DemandedElts.getBitWidth();
  592. unsigned BitWidth = DemandedBits.getBitWidth();
  593. KnownBits LHSKnown, RHSKnown;
  594. switch (Op.getOpcode()) {
  595. case ISD::BITCAST: {
  596. if (VT.isScalableVector())
  597. return SDValue();
  598. SDValue Src = peekThroughBitcasts(Op.getOperand(0));
  599. EVT SrcVT = Src.getValueType();
  600. EVT DstVT = Op.getValueType();
  601. if (SrcVT == DstVT)
  602. return Src;
  603. unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
  604. unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
  605. if (NumSrcEltBits == NumDstEltBits)
  606. if (SDValue V = SimplifyMultipleUseDemandedBits(
  607. Src, DemandedBits, DemandedElts, DAG, Depth + 1))
  608. return DAG.getBitcast(DstVT, V);
  609. if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
  610. unsigned Scale = NumDstEltBits / NumSrcEltBits;
  611. unsigned NumSrcElts = SrcVT.getVectorNumElements();
  612. APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
  613. APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
  614. for (unsigned i = 0; i != Scale; ++i) {
  615. unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
  616. unsigned BitOffset = EltOffset * NumSrcEltBits;
  617. APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
  618. if (!Sub.isZero()) {
  619. DemandedSrcBits |= Sub;
  620. for (unsigned j = 0; j != NumElts; ++j)
  621. if (DemandedElts[j])
  622. DemandedSrcElts.setBit((j * Scale) + i);
  623. }
  624. }
  625. if (SDValue V = SimplifyMultipleUseDemandedBits(
  626. Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
  627. return DAG.getBitcast(DstVT, V);
  628. }
  629. // TODO - bigendian once we have test coverage.
  630. if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
  631. unsigned Scale = NumSrcEltBits / NumDstEltBits;
  632. unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
  633. APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
  634. APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
  635. for (unsigned i = 0; i != NumElts; ++i)
  636. if (DemandedElts[i]) {
  637. unsigned Offset = (i % Scale) * NumDstEltBits;
  638. DemandedSrcBits.insertBits(DemandedBits, Offset);
  639. DemandedSrcElts.setBit(i / Scale);
  640. }
  641. if (SDValue V = SimplifyMultipleUseDemandedBits(
  642. Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
  643. return DAG.getBitcast(DstVT, V);
  644. }
  645. break;
  646. }
  647. case ISD::AND: {
  648. LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
  649. RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
  650. // If all of the demanded bits are known 1 on one side, return the other.
  651. // These bits cannot contribute to the result of the 'and' in this
  652. // context.
  653. if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
  654. return Op.getOperand(0);
  655. if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
  656. return Op.getOperand(1);
  657. break;
  658. }
  659. case ISD::OR: {
  660. LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
  661. RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
  662. // If all of the demanded bits are known zero on one side, return the
  663. // other. These bits cannot contribute to the result of the 'or' in this
  664. // context.
  665. if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
  666. return Op.getOperand(0);
  667. if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
  668. return Op.getOperand(1);
  669. break;
  670. }
  671. case ISD::XOR: {
  672. LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
  673. RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
  674. // If all of the demanded bits are known zero on one side, return the
  675. // other.
  676. if (DemandedBits.isSubsetOf(RHSKnown.Zero))
  677. return Op.getOperand(0);
  678. if (DemandedBits.isSubsetOf(LHSKnown.Zero))
  679. return Op.getOperand(1);
  680. break;
  681. }
  682. case ISD::SHL: {
  683. // If we are only demanding sign bits then we can use the shift source
  684. // directly.
  685. if (const APInt *MaxSA =
  686. DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
  687. SDValue Op0 = Op.getOperand(0);
  688. unsigned ShAmt = MaxSA->getZExtValue();
  689. unsigned NumSignBits =
  690. DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
  691. unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
  692. if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
  693. return Op0;
  694. }
  695. break;
  696. }
  697. case ISD::SETCC: {
  698. SDValue Op0 = Op.getOperand(0);
  699. SDValue Op1 = Op.getOperand(1);
  700. ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
  701. // If (1) we only need the sign-bit, (2) the setcc operands are the same
  702. // width as the setcc result, and (3) the result of a setcc conforms to 0 or
  703. // -1, we may be able to bypass the setcc.
  704. if (DemandedBits.isSignMask() &&
  705. Op0.getScalarValueSizeInBits() == BitWidth &&
  706. getBooleanContents(Op0.getValueType()) ==
  707. BooleanContent::ZeroOrNegativeOneBooleanContent) {
  708. // If we're testing X < 0, then this compare isn't needed - just use X!
  709. // FIXME: We're limiting to integer types here, but this should also work
  710. // if we don't care about FP signed-zero. The use of SETLT with FP means
  711. // that we don't care about NaNs.
  712. if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
  713. (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
  714. return Op0;
  715. }
  716. break;
  717. }
  718. case ISD::SIGN_EXTEND_INREG: {
  719. // If none of the extended bits are demanded, eliminate the sextinreg.
  720. SDValue Op0 = Op.getOperand(0);
  721. EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
  722. unsigned ExBits = ExVT.getScalarSizeInBits();
  723. if (DemandedBits.getActiveBits() <= ExBits)
  724. return Op0;
  725. // If the input is already sign extended, just drop the extension.
  726. unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
  727. if (NumSignBits >= (BitWidth - ExBits + 1))
  728. return Op0;
  729. break;
  730. }
  731. case ISD::ANY_EXTEND_VECTOR_INREG:
  732. case ISD::SIGN_EXTEND_VECTOR_INREG:
  733. case ISD::ZERO_EXTEND_VECTOR_INREG: {
  734. if (VT.isScalableVector())
  735. return SDValue();
  736. // If we only want the lowest element and none of extended bits, then we can
  737. // return the bitcasted source vector.
  738. SDValue Src = Op.getOperand(0);
  739. EVT SrcVT = Src.getValueType();
  740. EVT DstVT = Op.getValueType();
  741. if (IsLE && DemandedElts == 1 &&
  742. DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
  743. DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
  744. return DAG.getBitcast(DstVT, Src);
  745. }
  746. break;
  747. }
  748. case ISD::INSERT_VECTOR_ELT: {
  749. if (VT.isScalableVector())
  750. return SDValue();
  751. // If we don't demand the inserted element, return the base vector.
  752. SDValue Vec = Op.getOperand(0);
  753. auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
  754. EVT VecVT = Vec.getValueType();
  755. if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
  756. !DemandedElts[CIdx->getZExtValue()])
  757. return Vec;
  758. break;
  759. }
  760. case ISD::INSERT_SUBVECTOR: {
  761. if (VT.isScalableVector())
  762. return SDValue();
  763. SDValue Vec = Op.getOperand(0);
  764. SDValue Sub = Op.getOperand(1);
  765. uint64_t Idx = Op.getConstantOperandVal(2);
  766. unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
  767. APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
  768. // If we don't demand the inserted subvector, return the base vector.
  769. if (DemandedSubElts == 0)
  770. return Vec;
  771. // If this simply widens the lowest subvector, see if we can do it earlier.
  772. // TODO: REMOVE ME - SimplifyMultipleUseDemandedBits shouldn't be creating
  773. // general nodes like this.
  774. if (Idx == 0 && Vec.isUndef()) {
  775. if (SDValue NewSub = SimplifyMultipleUseDemandedBits(
  776. Sub, DemandedBits, DemandedSubElts, DAG, Depth + 1))
  777. return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
  778. Op.getOperand(0), NewSub, Op.getOperand(2));
  779. }
  780. break;
  781. }
  782. case ISD::VECTOR_SHUFFLE: {
  783. assert(!VT.isScalableVector());
  784. ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
  785. // If all the demanded elts are from one operand and are inline,
  786. // then we can use the operand directly.
  787. bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
  788. for (unsigned i = 0; i != NumElts; ++i) {
  789. int M = ShuffleMask[i];
  790. if (M < 0 || !DemandedElts[i])
  791. continue;
  792. AllUndef = false;
  793. IdentityLHS &= (M == (int)i);
  794. IdentityRHS &= ((M - NumElts) == i);
  795. }
  796. if (AllUndef)
  797. return DAG.getUNDEF(Op.getValueType());
  798. if (IdentityLHS)
  799. return Op.getOperand(0);
  800. if (IdentityRHS)
  801. return Op.getOperand(1);
  802. break;
  803. }
  804. default:
  805. // TODO: Probably okay to remove after audit; here to reduce change size
  806. // in initial enablement patch for scalable vectors
  807. if (VT.isScalableVector())
  808. return SDValue();
  809. if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
  810. if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
  811. Op, DemandedBits, DemandedElts, DAG, Depth))
  812. return V;
  813. break;
  814. }
  815. return SDValue();
  816. }
  817. SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
  818. SDValue Op, const APInt &DemandedBits, SelectionDAG &DAG,
  819. unsigned Depth) const {
  820. EVT VT = Op.getValueType();
  821. // Since the number of lanes in a scalable vector is unknown at compile time,
  822. // we track one bit which is implicitly broadcast to all lanes. This means
  823. // that all lanes in a scalable vector are considered demanded.
  824. APInt DemandedElts = VT.isFixedLengthVector()
  825. ? APInt::getAllOnes(VT.getVectorNumElements())
  826. : APInt(1, 1);
  827. return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
  828. Depth);
  829. }
  830. SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts(
  831. SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
  832. unsigned Depth) const {
  833. APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
  834. return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
  835. Depth);
  836. }
  837. // Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
  838. // or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
  839. static SDValue combineShiftToAVG(SDValue Op, SelectionDAG &DAG,
  840. const TargetLowering &TLI,
  841. const APInt &DemandedBits,
  842. const APInt &DemandedElts,
  843. unsigned Depth) {
  844. assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
  845. "SRL or SRA node is required here!");
  846. // Is the right shift using an immediate value of 1?
  847. ConstantSDNode *N1C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
  848. if (!N1C || !N1C->isOne())
  849. return SDValue();
  850. // We are looking for an avgfloor
  851. // add(ext, ext)
  852. // or one of these as a avgceil
  853. // add(add(ext, ext), 1)
  854. // add(add(ext, 1), ext)
  855. // add(ext, add(ext, 1))
  856. SDValue Add = Op.getOperand(0);
  857. if (Add.getOpcode() != ISD::ADD)
  858. return SDValue();
  859. SDValue ExtOpA = Add.getOperand(0);
  860. SDValue ExtOpB = Add.getOperand(1);
  861. auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3) {
  862. ConstantSDNode *ConstOp;
  863. if ((ConstOp = isConstOrConstSplat(Op1, DemandedElts)) &&
  864. ConstOp->isOne()) {
  865. ExtOpA = Op2;
  866. ExtOpB = Op3;
  867. return true;
  868. }
  869. if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) &&
  870. ConstOp->isOne()) {
  871. ExtOpA = Op1;
  872. ExtOpB = Op3;
  873. return true;
  874. }
  875. if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) &&
  876. ConstOp->isOne()) {
  877. ExtOpA = Op1;
  878. ExtOpB = Op2;
  879. return true;
  880. }
  881. return false;
  882. };
  883. bool IsCeil =
  884. (ExtOpA.getOpcode() == ISD::ADD &&
  885. MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB)) ||
  886. (ExtOpB.getOpcode() == ISD::ADD &&
  887. MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA));
  888. // If the shift is signed (sra):
  889. // - Needs >= 2 sign bit for both operands.
  890. // - Needs >= 2 zero bits.
  891. // If the shift is unsigned (srl):
  892. // - Needs >= 1 zero bit for both operands.
  893. // - Needs 1 demanded bit zero and >= 2 sign bits.
  894. unsigned ShiftOpc = Op.getOpcode();
  895. bool IsSigned = false;
  896. unsigned KnownBits;
  897. unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts, Depth);
  898. unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts, Depth);
  899. unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1;
  900. unsigned NumZeroA =
  901. DAG.computeKnownBits(ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
  902. unsigned NumZeroB =
  903. DAG.computeKnownBits(ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
  904. unsigned NumZero = std::min(NumZeroA, NumZeroB);
  905. switch (ShiftOpc) {
  906. default:
  907. llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
  908. case ISD::SRA: {
  909. if (NumZero >= 2 && NumSigned < NumZero) {
  910. IsSigned = false;
  911. KnownBits = NumZero;
  912. break;
  913. }
  914. if (NumSigned >= 1) {
  915. IsSigned = true;
  916. KnownBits = NumSigned;
  917. break;
  918. }
  919. return SDValue();
  920. }
  921. case ISD::SRL: {
  922. if (NumZero >= 1 && NumSigned < NumZero) {
  923. IsSigned = false;
  924. KnownBits = NumZero;
  925. break;
  926. }
  927. if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
  928. IsSigned = true;
  929. KnownBits = NumSigned;
  930. break;
  931. }
  932. return SDValue();
  933. }
  934. }
  935. unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
  936. : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
  937. // Find the smallest power-2 type that is legal for this vector size and
  938. // operation, given the original type size and the number of known sign/zero
  939. // bits.
  940. EVT VT = Op.getValueType();
  941. unsigned MinWidth =
  942. std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8);
  943. EVT NVT = EVT::getIntegerVT(*DAG.getContext(), PowerOf2Ceil(MinWidth));
  944. if (VT.isVector())
  945. NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
  946. if (!TLI.isOperationLegalOrCustom(AVGOpc, NVT))
  947. return SDValue();
  948. SDLoc DL(Op);
  949. SDValue ResultAVG =
  950. DAG.getNode(AVGOpc, DL, NVT, DAG.getNode(ISD::TRUNCATE, DL, NVT, ExtOpA),
  951. DAG.getNode(ISD::TRUNCATE, DL, NVT, ExtOpB));
  952. return DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL, VT,
  953. ResultAVG);
  954. }
  955. /// Look at Op. At this point, we know that only the OriginalDemandedBits of the
  956. /// result of Op are ever used downstream. If we can use this information to
  957. /// simplify Op, create a new simplified DAG node and return true, returning the
  958. /// original and new nodes in Old and New. Otherwise, analyze the expression and
  959. /// return a mask of Known bits for the expression (used to simplify the
  960. /// caller). The Known bits may only be accurate for those bits in the
  961. /// OriginalDemandedBits and OriginalDemandedElts.
  962. bool TargetLowering::SimplifyDemandedBits(
  963. SDValue Op, const APInt &OriginalDemandedBits,
  964. const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
  965. unsigned Depth, bool AssumeSingleUse) const {
  966. unsigned BitWidth = OriginalDemandedBits.getBitWidth();
  967. assert(Op.getScalarValueSizeInBits() == BitWidth &&
  968. "Mask size mismatches value type size!");
  969. // Don't know anything.
  970. Known = KnownBits(BitWidth);
  971. EVT VT = Op.getValueType();
  972. bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
  973. unsigned NumElts = OriginalDemandedElts.getBitWidth();
  974. assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&
  975. "Unexpected vector size");
  976. APInt DemandedBits = OriginalDemandedBits;
  977. APInt DemandedElts = OriginalDemandedElts;
  978. SDLoc dl(Op);
  979. auto &DL = TLO.DAG.getDataLayout();
  980. // Undef operand.
  981. if (Op.isUndef())
  982. return false;
  983. // We can't simplify target constants.
  984. if (Op.getOpcode() == ISD::TargetConstant)
  985. return false;
  986. if (Op.getOpcode() == ISD::Constant) {
  987. // We know all of the bits for a constant!
  988. Known = KnownBits::makeConstant(cast<ConstantSDNode>(Op)->getAPIntValue());
  989. return false;
  990. }
  991. if (Op.getOpcode() == ISD::ConstantFP) {
  992. // We know all of the bits for a floating point constant!
  993. Known = KnownBits::makeConstant(
  994. cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
  995. return false;
  996. }
  997. // Other users may use these bits.
  998. bool HasMultiUse = false;
  999. if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
  1000. if (Depth >= SelectionDAG::MaxRecursionDepth) {
  1001. // Limit search depth.
  1002. return false;
  1003. }
  1004. // Allow multiple uses, just set the DemandedBits/Elts to all bits.
  1005. DemandedBits = APInt::getAllOnes(BitWidth);
  1006. DemandedElts = APInt::getAllOnes(NumElts);
  1007. HasMultiUse = true;
  1008. } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
  1009. // Not demanding any bits/elts from Op.
  1010. return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
  1011. } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
  1012. // Limit search depth.
  1013. return false;
  1014. }
  1015. KnownBits Known2;
  1016. switch (Op.getOpcode()) {
  1017. case ISD::SCALAR_TO_VECTOR: {
  1018. if (VT.isScalableVector())
  1019. return false;
  1020. if (!DemandedElts[0])
  1021. return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
  1022. KnownBits SrcKnown;
  1023. SDValue Src = Op.getOperand(0);
  1024. unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
  1025. APInt SrcDemandedBits = DemandedBits.zext(SrcBitWidth);
  1026. if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
  1027. return true;
  1028. // Upper elements are undef, so only get the knownbits if we just demand
  1029. // the bottom element.
  1030. if (DemandedElts == 1)
  1031. Known = SrcKnown.anyextOrTrunc(BitWidth);
  1032. break;
  1033. }
  1034. case ISD::BUILD_VECTOR:
  1035. // Collect the known bits that are shared by every demanded element.
  1036. // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
  1037. Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
  1038. return false; // Don't fall through, will infinitely loop.
  1039. case ISD::LOAD: {
  1040. auto *LD = cast<LoadSDNode>(Op);
  1041. if (getTargetConstantFromLoad(LD)) {
  1042. Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
  1043. return false; // Don't fall through, will infinitely loop.
  1044. }
  1045. if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
  1046. // If this is a ZEXTLoad and we are looking at the loaded value.
  1047. EVT MemVT = LD->getMemoryVT();
  1048. unsigned MemBits = MemVT.getScalarSizeInBits();
  1049. Known.Zero.setBitsFrom(MemBits);
  1050. return false; // Don't fall through, will infinitely loop.
  1051. }
  1052. break;
  1053. }
  1054. case ISD::INSERT_VECTOR_ELT: {
  1055. if (VT.isScalableVector())
  1056. return false;
  1057. SDValue Vec = Op.getOperand(0);
  1058. SDValue Scl = Op.getOperand(1);
  1059. auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
  1060. EVT VecVT = Vec.getValueType();
  1061. // If index isn't constant, assume we need all vector elements AND the
  1062. // inserted element.
  1063. APInt DemandedVecElts(DemandedElts);
  1064. if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
  1065. unsigned Idx = CIdx->getZExtValue();
  1066. DemandedVecElts.clearBit(Idx);
  1067. // Inserted element is not required.
  1068. if (!DemandedElts[Idx])
  1069. return TLO.CombineTo(Op, Vec);
  1070. }
  1071. KnownBits KnownScl;
  1072. unsigned NumSclBits = Scl.getScalarValueSizeInBits();
  1073. APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
  1074. if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
  1075. return true;
  1076. Known = KnownScl.anyextOrTrunc(BitWidth);
  1077. KnownBits KnownVec;
  1078. if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
  1079. Depth + 1))
  1080. return true;
  1081. if (!!DemandedVecElts)
  1082. Known = KnownBits::commonBits(Known, KnownVec);
  1083. return false;
  1084. }
  1085. case ISD::INSERT_SUBVECTOR: {
  1086. if (VT.isScalableVector())
  1087. return false;
  1088. // Demand any elements from the subvector and the remainder from the src its
  1089. // inserted into.
  1090. SDValue Src = Op.getOperand(0);
  1091. SDValue Sub = Op.getOperand(1);
  1092. uint64_t Idx = Op.getConstantOperandVal(2);
  1093. unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
  1094. APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
  1095. APInt DemandedSrcElts = DemandedElts;
  1096. DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
  1097. KnownBits KnownSub, KnownSrc;
  1098. if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
  1099. Depth + 1))
  1100. return true;
  1101. if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
  1102. Depth + 1))
  1103. return true;
  1104. Known.Zero.setAllBits();
  1105. Known.One.setAllBits();
  1106. if (!!DemandedSubElts)
  1107. Known = KnownBits::commonBits(Known, KnownSub);
  1108. if (!!DemandedSrcElts)
  1109. Known = KnownBits::commonBits(Known, KnownSrc);
  1110. // Attempt to avoid multi-use src if we don't need anything from it.
  1111. if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
  1112. !DemandedSrcElts.isAllOnes()) {
  1113. SDValue NewSub = SimplifyMultipleUseDemandedBits(
  1114. Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
  1115. SDValue NewSrc = SimplifyMultipleUseDemandedBits(
  1116. Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
  1117. if (NewSub || NewSrc) {
  1118. NewSub = NewSub ? NewSub : Sub;
  1119. NewSrc = NewSrc ? NewSrc : Src;
  1120. SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
  1121. Op.getOperand(2));
  1122. return TLO.CombineTo(Op, NewOp);
  1123. }
  1124. }
  1125. break;
  1126. }
  1127. case ISD::EXTRACT_SUBVECTOR: {
  1128. if (VT.isScalableVector())
  1129. return false;
  1130. // Offset the demanded elts by the subvector index.
  1131. SDValue Src = Op.getOperand(0);
  1132. if (Src.getValueType().isScalableVector())
  1133. break;
  1134. uint64_t Idx = Op.getConstantOperandVal(1);
  1135. unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
  1136. APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
  1137. if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
  1138. Depth + 1))
  1139. return true;
  1140. // Attempt to avoid multi-use src if we don't need anything from it.
  1141. if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
  1142. SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
  1143. Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
  1144. if (DemandedSrc) {
  1145. SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
  1146. Op.getOperand(1));
  1147. return TLO.CombineTo(Op, NewOp);
  1148. }
  1149. }
  1150. break;
  1151. }
  1152. case ISD::CONCAT_VECTORS: {
  1153. if (VT.isScalableVector())
  1154. return false;
  1155. Known.Zero.setAllBits();
  1156. Known.One.setAllBits();
  1157. EVT SubVT = Op.getOperand(0).getValueType();
  1158. unsigned NumSubVecs = Op.getNumOperands();
  1159. unsigned NumSubElts = SubVT.getVectorNumElements();
  1160. for (unsigned i = 0; i != NumSubVecs; ++i) {
  1161. APInt DemandedSubElts =
  1162. DemandedElts.extractBits(NumSubElts, i * NumSubElts);
  1163. if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
  1164. Known2, TLO, Depth + 1))
  1165. return true;
  1166. // Known bits are shared by every demanded subvector element.
  1167. if (!!DemandedSubElts)
  1168. Known = KnownBits::commonBits(Known, Known2);
  1169. }
  1170. break;
  1171. }
  1172. case ISD::VECTOR_SHUFFLE: {
  1173. assert(!VT.isScalableVector());
  1174. ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
  1175. // Collect demanded elements from shuffle operands..
  1176. APInt DemandedLHS, DemandedRHS;
  1177. if (!getShuffleDemandedElts(NumElts, ShuffleMask, DemandedElts, DemandedLHS,
  1178. DemandedRHS))
  1179. break;
  1180. if (!!DemandedLHS || !!DemandedRHS) {
  1181. SDValue Op0 = Op.getOperand(0);
  1182. SDValue Op1 = Op.getOperand(1);
  1183. Known.Zero.setAllBits();
  1184. Known.One.setAllBits();
  1185. if (!!DemandedLHS) {
  1186. if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
  1187. Depth + 1))
  1188. return true;
  1189. Known = KnownBits::commonBits(Known, Known2);
  1190. }
  1191. if (!!DemandedRHS) {
  1192. if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
  1193. Depth + 1))
  1194. return true;
  1195. Known = KnownBits::commonBits(Known, Known2);
  1196. }
  1197. // Attempt to avoid multi-use ops if we don't need anything from them.
  1198. SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
  1199. Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
  1200. SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
  1201. Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
  1202. if (DemandedOp0 || DemandedOp1) {
  1203. Op0 = DemandedOp0 ? DemandedOp0 : Op0;
  1204. Op1 = DemandedOp1 ? DemandedOp1 : Op1;
  1205. SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
  1206. return TLO.CombineTo(Op, NewOp);
  1207. }
  1208. }
  1209. break;
  1210. }
  1211. case ISD::AND: {
  1212. SDValue Op0 = Op.getOperand(0);
  1213. SDValue Op1 = Op.getOperand(1);
  1214. // If the RHS is a constant, check to see if the LHS would be zero without
  1215. // using the bits from the RHS. Below, we use knowledge about the RHS to
  1216. // simplify the LHS, here we're using information from the LHS to simplify
  1217. // the RHS.
  1218. if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1)) {
  1219. // Do not increment Depth here; that can cause an infinite loop.
  1220. KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
  1221. // If the LHS already has zeros where RHSC does, this 'and' is dead.
  1222. if ((LHSKnown.Zero & DemandedBits) ==
  1223. (~RHSC->getAPIntValue() & DemandedBits))
  1224. return TLO.CombineTo(Op, Op0);
  1225. // If any of the set bits in the RHS are known zero on the LHS, shrink
  1226. // the constant.
  1227. if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
  1228. DemandedElts, TLO))
  1229. return true;
  1230. // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
  1231. // constant, but if this 'and' is only clearing bits that were just set by
  1232. // the xor, then this 'and' can be eliminated by shrinking the mask of
  1233. // the xor. For example, for a 32-bit X:
  1234. // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
  1235. if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
  1236. LHSKnown.One == ~RHSC->getAPIntValue()) {
  1237. SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
  1238. return TLO.CombineTo(Op, Xor);
  1239. }
  1240. }
  1241. // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
  1242. // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
  1243. if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
  1244. (Op0.getOperand(0).isUndef() ||
  1245. ISD::isBuildVectorOfConstantSDNodes(Op0.getOperand(0).getNode())) &&
  1246. Op0->hasOneUse()) {
  1247. unsigned NumSubElts =
  1248. Op0.getOperand(1).getValueType().getVectorNumElements();
  1249. unsigned SubIdx = Op0.getConstantOperandVal(2);
  1250. APInt DemandedSub =
  1251. APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts);
  1252. KnownBits KnownSubMask =
  1253. TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts, Depth + 1);
  1254. if (DemandedBits.isSubsetOf(KnownSubMask.One)) {
  1255. SDValue NewAnd =
  1256. TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1);
  1257. SDValue NewInsert =
  1258. TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd,
  1259. Op0.getOperand(1), Op0.getOperand(2));
  1260. return TLO.CombineTo(Op, NewInsert);
  1261. }
  1262. }
  1263. if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
  1264. Depth + 1))
  1265. return true;
  1266. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  1267. if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
  1268. Known2, TLO, Depth + 1))
  1269. return true;
  1270. assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
  1271. // If all of the demanded bits are known one on one side, return the other.
  1272. // These bits cannot contribute to the result of the 'and'.
  1273. if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
  1274. return TLO.CombineTo(Op, Op0);
  1275. if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
  1276. return TLO.CombineTo(Op, Op1);
  1277. // If all of the demanded bits in the inputs are known zeros, return zero.
  1278. if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
  1279. return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
  1280. // If the RHS is a constant, see if we can simplify it.
  1281. if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
  1282. TLO))
  1283. return true;
  1284. // If the operation can be done in a smaller type, do so.
  1285. if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
  1286. return true;
  1287. // Attempt to avoid multi-use ops if we don't need anything from them.
  1288. if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
  1289. SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
  1290. Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
  1291. SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
  1292. Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
  1293. if (DemandedOp0 || DemandedOp1) {
  1294. Op0 = DemandedOp0 ? DemandedOp0 : Op0;
  1295. Op1 = DemandedOp1 ? DemandedOp1 : Op1;
  1296. SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
  1297. return TLO.CombineTo(Op, NewOp);
  1298. }
  1299. }
  1300. Known &= Known2;
  1301. break;
  1302. }
  1303. case ISD::OR: {
  1304. SDValue Op0 = Op.getOperand(0);
  1305. SDValue Op1 = Op.getOperand(1);
  1306. if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
  1307. Depth + 1))
  1308. return true;
  1309. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  1310. if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
  1311. Known2, TLO, Depth + 1))
  1312. return true;
  1313. assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
  1314. // If all of the demanded bits are known zero on one side, return the other.
  1315. // These bits cannot contribute to the result of the 'or'.
  1316. if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
  1317. return TLO.CombineTo(Op, Op0);
  1318. if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
  1319. return TLO.CombineTo(Op, Op1);
  1320. // If the RHS is a constant, see if we can simplify it.
  1321. if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
  1322. return true;
  1323. // If the operation can be done in a smaller type, do so.
  1324. if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
  1325. return true;
  1326. // Attempt to avoid multi-use ops if we don't need anything from them.
  1327. if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
  1328. SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
  1329. Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
  1330. SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
  1331. Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
  1332. if (DemandedOp0 || DemandedOp1) {
  1333. Op0 = DemandedOp0 ? DemandedOp0 : Op0;
  1334. Op1 = DemandedOp1 ? DemandedOp1 : Op1;
  1335. SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
  1336. return TLO.CombineTo(Op, NewOp);
  1337. }
  1338. }
  1339. // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
  1340. // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
  1341. if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
  1342. Op0->hasOneUse() && Op1->hasOneUse()) {
  1343. // Attempt to match all commutations - m_c_Or would've been useful!
  1344. for (int I = 0; I != 2; ++I) {
  1345. SDValue X = Op.getOperand(I).getOperand(0);
  1346. SDValue C1 = Op.getOperand(I).getOperand(1);
  1347. SDValue Alt = Op.getOperand(1 - I).getOperand(0);
  1348. SDValue C2 = Op.getOperand(1 - I).getOperand(1);
  1349. if (Alt.getOpcode() == ISD::OR) {
  1350. for (int J = 0; J != 2; ++J) {
  1351. if (X == Alt.getOperand(J)) {
  1352. SDValue Y = Alt.getOperand(1 - J);
  1353. if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(ISD::OR, dl, VT,
  1354. {C1, C2})) {
  1355. SDValue MaskX = TLO.DAG.getNode(ISD::AND, dl, VT, X, C12);
  1356. SDValue MaskY = TLO.DAG.getNode(ISD::AND, dl, VT, Y, C2);
  1357. return TLO.CombineTo(
  1358. Op, TLO.DAG.getNode(ISD::OR, dl, VT, MaskX, MaskY));
  1359. }
  1360. }
  1361. }
  1362. }
  1363. }
  1364. }
  1365. Known |= Known2;
  1366. break;
  1367. }
  1368. case ISD::XOR: {
  1369. SDValue Op0 = Op.getOperand(0);
  1370. SDValue Op1 = Op.getOperand(1);
  1371. if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
  1372. Depth + 1))
  1373. return true;
  1374. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  1375. if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
  1376. Depth + 1))
  1377. return true;
  1378. assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
  1379. // If all of the demanded bits are known zero on one side, return the other.
  1380. // These bits cannot contribute to the result of the 'xor'.
  1381. if (DemandedBits.isSubsetOf(Known.Zero))
  1382. return TLO.CombineTo(Op, Op0);
  1383. if (DemandedBits.isSubsetOf(Known2.Zero))
  1384. return TLO.CombineTo(Op, Op1);
  1385. // If the operation can be done in a smaller type, do so.
  1386. if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
  1387. return true;
  1388. // If all of the unknown bits are known to be zero on one side or the other
  1389. // turn this into an *inclusive* or.
  1390. // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
  1391. if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
  1392. return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
  1393. ConstantSDNode *C = isConstOrConstSplat(Op1, DemandedElts);
  1394. if (C) {
  1395. // If one side is a constant, and all of the set bits in the constant are
  1396. // also known set on the other side, turn this into an AND, as we know
  1397. // the bits will be cleared.
  1398. // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
  1399. // NB: it is okay if more bits are known than are requested
  1400. if (C->getAPIntValue() == Known2.One) {
  1401. SDValue ANDC =
  1402. TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
  1403. return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
  1404. }
  1405. // If the RHS is a constant, see if we can change it. Don't alter a -1
  1406. // constant because that's a 'not' op, and that is better for combining
  1407. // and codegen.
  1408. if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
  1409. // We're flipping all demanded bits. Flip the undemanded bits too.
  1410. SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
  1411. return TLO.CombineTo(Op, New);
  1412. }
  1413. unsigned Op0Opcode = Op0.getOpcode();
  1414. if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
  1415. if (ConstantSDNode *ShiftC =
  1416. isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
  1417. // Don't crash on an oversized shift. We can not guarantee that a
  1418. // bogus shift has been simplified to undef.
  1419. if (ShiftC->getAPIntValue().ult(BitWidth)) {
  1420. uint64_t ShiftAmt = ShiftC->getZExtValue();
  1421. APInt Ones = APInt::getAllOnes(BitWidth);
  1422. Ones = Op0Opcode == ISD::SHL ? Ones.shl(ShiftAmt)
  1423. : Ones.lshr(ShiftAmt);
  1424. const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo();
  1425. if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
  1426. TLI.isDesirableToCommuteXorWithShift(Op.getNode())) {
  1427. // If the xor constant is a demanded mask, do a 'not' before the
  1428. // shift:
  1429. // xor (X << ShiftC), XorC --> (not X) << ShiftC
  1430. // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
  1431. SDValue Not = TLO.DAG.getNOT(dl, Op0.getOperand(0), VT);
  1432. return TLO.CombineTo(Op, TLO.DAG.getNode(Op0Opcode, dl, VT, Not,
  1433. Op0.getOperand(1)));
  1434. }
  1435. }
  1436. }
  1437. }
  1438. }
  1439. // If we can't turn this into a 'not', try to shrink the constant.
  1440. if (!C || !C->isAllOnes())
  1441. if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
  1442. return true;
  1443. // Attempt to avoid multi-use ops if we don't need anything from them.
  1444. if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
  1445. SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
  1446. Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
  1447. SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
  1448. Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
  1449. if (DemandedOp0 || DemandedOp1) {
  1450. Op0 = DemandedOp0 ? DemandedOp0 : Op0;
  1451. Op1 = DemandedOp1 ? DemandedOp1 : Op1;
  1452. SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
  1453. return TLO.CombineTo(Op, NewOp);
  1454. }
  1455. }
  1456. Known ^= Known2;
  1457. break;
  1458. }
  1459. case ISD::SELECT:
  1460. if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known, TLO,
  1461. Depth + 1))
  1462. return true;
  1463. if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, Known2, TLO,
  1464. Depth + 1))
  1465. return true;
  1466. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  1467. assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
  1468. // If the operands are constants, see if we can simplify them.
  1469. if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
  1470. return true;
  1471. // Only known if known in both the LHS and RHS.
  1472. Known = KnownBits::commonBits(Known, Known2);
  1473. break;
  1474. case ISD::VSELECT:
  1475. if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
  1476. Known, TLO, Depth + 1))
  1477. return true;
  1478. if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
  1479. Known2, TLO, Depth + 1))
  1480. return true;
  1481. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  1482. assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
  1483. // Only known if known in both the LHS and RHS.
  1484. Known = KnownBits::commonBits(Known, Known2);
  1485. break;
  1486. case ISD::SELECT_CC:
  1487. if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, Known, TLO,
  1488. Depth + 1))
  1489. return true;
  1490. if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known2, TLO,
  1491. Depth + 1))
  1492. return true;
  1493. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  1494. assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
  1495. // If the operands are constants, see if we can simplify them.
  1496. if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
  1497. return true;
  1498. // Only known if known in both the LHS and RHS.
  1499. Known = KnownBits::commonBits(Known, Known2);
  1500. break;
  1501. case ISD::SETCC: {
  1502. SDValue Op0 = Op.getOperand(0);
  1503. SDValue Op1 = Op.getOperand(1);
  1504. ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
  1505. // If (1) we only need the sign-bit, (2) the setcc operands are the same
  1506. // width as the setcc result, and (3) the result of a setcc conforms to 0 or
  1507. // -1, we may be able to bypass the setcc.
  1508. if (DemandedBits.isSignMask() &&
  1509. Op0.getScalarValueSizeInBits() == BitWidth &&
  1510. getBooleanContents(Op0.getValueType()) ==
  1511. BooleanContent::ZeroOrNegativeOneBooleanContent) {
  1512. // If we're testing X < 0, then this compare isn't needed - just use X!
  1513. // FIXME: We're limiting to integer types here, but this should also work
  1514. // if we don't care about FP signed-zero. The use of SETLT with FP means
  1515. // that we don't care about NaNs.
  1516. if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
  1517. (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
  1518. return TLO.CombineTo(Op, Op0);
  1519. // TODO: Should we check for other forms of sign-bit comparisons?
  1520. // Examples: X <= -1, X >= 0
  1521. }
  1522. if (getBooleanContents(Op0.getValueType()) ==
  1523. TargetLowering::ZeroOrOneBooleanContent &&
  1524. BitWidth > 1)
  1525. Known.Zero.setBitsFrom(1);
  1526. break;
  1527. }
  1528. case ISD::SHL: {
  1529. SDValue Op0 = Op.getOperand(0);
  1530. SDValue Op1 = Op.getOperand(1);
  1531. EVT ShiftVT = Op1.getValueType();
  1532. if (const APInt *SA =
  1533. TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
  1534. unsigned ShAmt = SA->getZExtValue();
  1535. if (ShAmt == 0)
  1536. return TLO.CombineTo(Op, Op0);
  1537. // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
  1538. // single shift. We can do this if the bottom bits (which are shifted
  1539. // out) are never demanded.
  1540. // TODO - support non-uniform vector amounts.
  1541. if (Op0.getOpcode() == ISD::SRL) {
  1542. if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
  1543. if (const APInt *SA2 =
  1544. TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
  1545. unsigned C1 = SA2->getZExtValue();
  1546. unsigned Opc = ISD::SHL;
  1547. int Diff = ShAmt - C1;
  1548. if (Diff < 0) {
  1549. Diff = -Diff;
  1550. Opc = ISD::SRL;
  1551. }
  1552. SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
  1553. return TLO.CombineTo(
  1554. Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
  1555. }
  1556. }
  1557. }
  1558. // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
  1559. // are not demanded. This will likely allow the anyext to be folded away.
  1560. // TODO - support non-uniform vector amounts.
  1561. if (Op0.getOpcode() == ISD::ANY_EXTEND) {
  1562. SDValue InnerOp = Op0.getOperand(0);
  1563. EVT InnerVT = InnerOp.getValueType();
  1564. unsigned InnerBits = InnerVT.getScalarSizeInBits();
  1565. if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
  1566. isTypeDesirableForOp(ISD::SHL, InnerVT)) {
  1567. EVT ShTy = getShiftAmountTy(InnerVT, DL);
  1568. if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
  1569. ShTy = InnerVT;
  1570. SDValue NarrowShl =
  1571. TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp,
  1572. TLO.DAG.getConstant(ShAmt, dl, ShTy));
  1573. return TLO.CombineTo(
  1574. Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
  1575. }
  1576. // Repeat the SHL optimization above in cases where an extension
  1577. // intervenes: (shl (anyext (shr x, c1)), c2) to
  1578. // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
  1579. // aren't demanded (as above) and that the shifted upper c1 bits of
  1580. // x aren't demanded.
  1581. // TODO - support non-uniform vector amounts.
  1582. if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
  1583. InnerOp.hasOneUse()) {
  1584. if (const APInt *SA2 =
  1585. TLO.DAG.getValidShiftAmountConstant(InnerOp, DemandedElts)) {
  1586. unsigned InnerShAmt = SA2->getZExtValue();
  1587. if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
  1588. DemandedBits.getActiveBits() <=
  1589. (InnerBits - InnerShAmt + ShAmt) &&
  1590. DemandedBits.countTrailingZeros() >= ShAmt) {
  1591. SDValue NewSA =
  1592. TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
  1593. SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
  1594. InnerOp.getOperand(0));
  1595. return TLO.CombineTo(
  1596. Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
  1597. }
  1598. }
  1599. }
  1600. }
  1601. APInt InDemandedMask = DemandedBits.lshr(ShAmt);
  1602. if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
  1603. Depth + 1))
  1604. return true;
  1605. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  1606. Known.Zero <<= ShAmt;
  1607. Known.One <<= ShAmt;
  1608. // low bits known zero.
  1609. Known.Zero.setLowBits(ShAmt);
  1610. // Attempt to avoid multi-use ops if we don't need anything from them.
  1611. if (!InDemandedMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
  1612. SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
  1613. Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
  1614. if (DemandedOp0) {
  1615. SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1);
  1616. return TLO.CombineTo(Op, NewOp);
  1617. }
  1618. }
  1619. // Try shrinking the operation as long as the shift amount will still be
  1620. // in range.
  1621. if ((ShAmt < DemandedBits.getActiveBits()) &&
  1622. ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
  1623. return true;
  1624. } else {
  1625. // This is a variable shift, so we can't shift the demand mask by a known
  1626. // amount. But if we are not demanding high bits, then we are not
  1627. // demanding those bits from the pre-shifted operand either.
  1628. if (unsigned CTLZ = DemandedBits.countLeadingZeros()) {
  1629. APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
  1630. if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO,
  1631. Depth + 1)) {
  1632. SDNodeFlags Flags = Op.getNode()->getFlags();
  1633. if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
  1634. // Disable the nsw and nuw flags. We can no longer guarantee that we
  1635. // won't wrap after simplification.
  1636. Flags.setNoSignedWrap(false);
  1637. Flags.setNoUnsignedWrap(false);
  1638. Op->setFlags(Flags);
  1639. }
  1640. return true;
  1641. }
  1642. Known.resetAll();
  1643. }
  1644. }
  1645. // If we are only demanding sign bits then we can use the shift source
  1646. // directly.
  1647. if (const APInt *MaxSA =
  1648. TLO.DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
  1649. unsigned ShAmt = MaxSA->getZExtValue();
  1650. unsigned NumSignBits =
  1651. TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
  1652. unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
  1653. if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
  1654. return TLO.CombineTo(Op, Op0);
  1655. }
  1656. break;
  1657. }
  1658. case ISD::SRL: {
  1659. SDValue Op0 = Op.getOperand(0);
  1660. SDValue Op1 = Op.getOperand(1);
  1661. EVT ShiftVT = Op1.getValueType();
  1662. // Try to match AVG patterns.
  1663. if (SDValue AVG = combineShiftToAVG(Op, TLO.DAG, *this, DemandedBits,
  1664. DemandedElts, Depth + 1))
  1665. return TLO.CombineTo(Op, AVG);
  1666. if (const APInt *SA =
  1667. TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
  1668. unsigned ShAmt = SA->getZExtValue();
  1669. if (ShAmt == 0)
  1670. return TLO.CombineTo(Op, Op0);
  1671. // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
  1672. // single shift. We can do this if the top bits (which are shifted out)
  1673. // are never demanded.
  1674. // TODO - support non-uniform vector amounts.
  1675. if (Op0.getOpcode() == ISD::SHL) {
  1676. if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
  1677. if (const APInt *SA2 =
  1678. TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
  1679. unsigned C1 = SA2->getZExtValue();
  1680. unsigned Opc = ISD::SRL;
  1681. int Diff = ShAmt - C1;
  1682. if (Diff < 0) {
  1683. Diff = -Diff;
  1684. Opc = ISD::SHL;
  1685. }
  1686. SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
  1687. return TLO.CombineTo(
  1688. Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
  1689. }
  1690. }
  1691. }
  1692. APInt InDemandedMask = (DemandedBits << ShAmt);
  1693. // If the shift is exact, then it does demand the low bits (and knows that
  1694. // they are zero).
  1695. if (Op->getFlags().hasExact())
  1696. InDemandedMask.setLowBits(ShAmt);
  1697. // Compute the new bits that are at the top now.
  1698. if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
  1699. Depth + 1))
  1700. return true;
  1701. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  1702. Known.Zero.lshrInPlace(ShAmt);
  1703. Known.One.lshrInPlace(ShAmt);
  1704. // High bits known zero.
  1705. Known.Zero.setHighBits(ShAmt);
  1706. // Attempt to avoid multi-use ops if we don't need anything from them.
  1707. if (!InDemandedMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
  1708. SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
  1709. Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
  1710. if (DemandedOp0) {
  1711. SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, DemandedOp0, Op1);
  1712. return TLO.CombineTo(Op, NewOp);
  1713. }
  1714. }
  1715. }
  1716. break;
  1717. }
  1718. case ISD::SRA: {
  1719. SDValue Op0 = Op.getOperand(0);
  1720. SDValue Op1 = Op.getOperand(1);
  1721. EVT ShiftVT = Op1.getValueType();
  1722. // If we only want bits that already match the signbit then we don't need
  1723. // to shift.
  1724. unsigned NumHiDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
  1725. if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
  1726. NumHiDemandedBits)
  1727. return TLO.CombineTo(Op, Op0);
  1728. // If this is an arithmetic shift right and only the low-bit is set, we can
  1729. // always convert this into a logical shr, even if the shift amount is
  1730. // variable. The low bit of the shift cannot be an input sign bit unless
  1731. // the shift amount is >= the size of the datatype, which is undefined.
  1732. if (DemandedBits.isOne())
  1733. return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
  1734. // Try to match AVG patterns.
  1735. if (SDValue AVG = combineShiftToAVG(Op, TLO.DAG, *this, DemandedBits,
  1736. DemandedElts, Depth + 1))
  1737. return TLO.CombineTo(Op, AVG);
  1738. if (const APInt *SA =
  1739. TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
  1740. unsigned ShAmt = SA->getZExtValue();
  1741. if (ShAmt == 0)
  1742. return TLO.CombineTo(Op, Op0);
  1743. APInt InDemandedMask = (DemandedBits << ShAmt);
  1744. // If the shift is exact, then it does demand the low bits (and knows that
  1745. // they are zero).
  1746. if (Op->getFlags().hasExact())
  1747. InDemandedMask.setLowBits(ShAmt);
  1748. // If any of the demanded bits are produced by the sign extension, we also
  1749. // demand the input sign bit.
  1750. if (DemandedBits.countLeadingZeros() < ShAmt)
  1751. InDemandedMask.setSignBit();
  1752. if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
  1753. Depth + 1))
  1754. return true;
  1755. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  1756. Known.Zero.lshrInPlace(ShAmt);
  1757. Known.One.lshrInPlace(ShAmt);
  1758. // If the input sign bit is known to be zero, or if none of the top bits
  1759. // are demanded, turn this into an unsigned shift right.
  1760. if (Known.Zero[BitWidth - ShAmt - 1] ||
  1761. DemandedBits.countLeadingZeros() >= ShAmt) {
  1762. SDNodeFlags Flags;
  1763. Flags.setExact(Op->getFlags().hasExact());
  1764. return TLO.CombineTo(
  1765. Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
  1766. }
  1767. int Log2 = DemandedBits.exactLogBase2();
  1768. if (Log2 >= 0) {
  1769. // The bit must come from the sign.
  1770. SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
  1771. return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
  1772. }
  1773. if (Known.One[BitWidth - ShAmt - 1])
  1774. // New bits are known one.
  1775. Known.One.setHighBits(ShAmt);
  1776. // Attempt to avoid multi-use ops if we don't need anything from them.
  1777. if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
  1778. SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
  1779. Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
  1780. if (DemandedOp0) {
  1781. SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
  1782. return TLO.CombineTo(Op, NewOp);
  1783. }
  1784. }
  1785. }
  1786. break;
  1787. }
  1788. case ISD::FSHL:
  1789. case ISD::FSHR: {
  1790. SDValue Op0 = Op.getOperand(0);
  1791. SDValue Op1 = Op.getOperand(1);
  1792. SDValue Op2 = Op.getOperand(2);
  1793. bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
  1794. if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
  1795. unsigned Amt = SA->getAPIntValue().urem(BitWidth);
  1796. // For fshl, 0-shift returns the 1st arg.
  1797. // For fshr, 0-shift returns the 2nd arg.
  1798. if (Amt == 0) {
  1799. if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
  1800. Known, TLO, Depth + 1))
  1801. return true;
  1802. break;
  1803. }
  1804. // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
  1805. // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
  1806. APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
  1807. APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
  1808. if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
  1809. Depth + 1))
  1810. return true;
  1811. if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
  1812. Depth + 1))
  1813. return true;
  1814. Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
  1815. Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
  1816. Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
  1817. Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
  1818. Known.One |= Known2.One;
  1819. Known.Zero |= Known2.Zero;
  1820. // Attempt to avoid multi-use ops if we don't need anything from them.
  1821. if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
  1822. !DemandedElts.isAllOnes()) {
  1823. SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
  1824. Op0, Demanded0, DemandedElts, TLO.DAG, Depth + 1);
  1825. SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
  1826. Op1, Demanded1, DemandedElts, TLO.DAG, Depth + 1);
  1827. if (DemandedOp0 || DemandedOp1) {
  1828. DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
  1829. DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
  1830. SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0,
  1831. DemandedOp1, Op2);
  1832. return TLO.CombineTo(Op, NewOp);
  1833. }
  1834. }
  1835. }
  1836. // For pow-2 bitwidths we only demand the bottom modulo amt bits.
  1837. if (isPowerOf2_32(BitWidth)) {
  1838. APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
  1839. if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
  1840. Known2, TLO, Depth + 1))
  1841. return true;
  1842. }
  1843. break;
  1844. }
  1845. case ISD::ROTL:
  1846. case ISD::ROTR: {
  1847. SDValue Op0 = Op.getOperand(0);
  1848. SDValue Op1 = Op.getOperand(1);
  1849. bool IsROTL = (Op.getOpcode() == ISD::ROTL);
  1850. // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
  1851. if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
  1852. return TLO.CombineTo(Op, Op0);
  1853. if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
  1854. unsigned Amt = SA->getAPIntValue().urem(BitWidth);
  1855. unsigned RevAmt = BitWidth - Amt;
  1856. // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
  1857. // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
  1858. APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
  1859. if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
  1860. Depth + 1))
  1861. return true;
  1862. // rot*(x, 0) --> x
  1863. if (Amt == 0)
  1864. return TLO.CombineTo(Op, Op0);
  1865. // See if we don't demand either half of the rotated bits.
  1866. if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
  1867. DemandedBits.countTrailingZeros() >= (IsROTL ? Amt : RevAmt)) {
  1868. Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
  1869. return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
  1870. }
  1871. if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
  1872. DemandedBits.countLeadingZeros() >= (IsROTL ? RevAmt : Amt)) {
  1873. Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
  1874. return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
  1875. }
  1876. }
  1877. // For pow-2 bitwidths we only demand the bottom modulo amt bits.
  1878. if (isPowerOf2_32(BitWidth)) {
  1879. APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
  1880. if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
  1881. Depth + 1))
  1882. return true;
  1883. }
  1884. break;
  1885. }
  1886. case ISD::UMIN: {
  1887. // Check if one arg is always less than (or equal) to the other arg.
  1888. SDValue Op0 = Op.getOperand(0);
  1889. SDValue Op1 = Op.getOperand(1);
  1890. KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
  1891. KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
  1892. Known = KnownBits::umin(Known0, Known1);
  1893. if (std::optional<bool> IsULE = KnownBits::ule(Known0, Known1))
  1894. return TLO.CombineTo(Op, *IsULE ? Op0 : Op1);
  1895. if (std::optional<bool> IsULT = KnownBits::ult(Known0, Known1))
  1896. return TLO.CombineTo(Op, *IsULT ? Op0 : Op1);
  1897. break;
  1898. }
  1899. case ISD::UMAX: {
  1900. // Check if one arg is always greater than (or equal) to the other arg.
  1901. SDValue Op0 = Op.getOperand(0);
  1902. SDValue Op1 = Op.getOperand(1);
  1903. KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
  1904. KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
  1905. Known = KnownBits::umax(Known0, Known1);
  1906. if (std::optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
  1907. return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1);
  1908. if (std::optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
  1909. return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1);
  1910. break;
  1911. }
  1912. case ISD::BITREVERSE: {
  1913. SDValue Src = Op.getOperand(0);
  1914. APInt DemandedSrcBits = DemandedBits.reverseBits();
  1915. if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
  1916. Depth + 1))
  1917. return true;
  1918. Known.One = Known2.One.reverseBits();
  1919. Known.Zero = Known2.Zero.reverseBits();
  1920. break;
  1921. }
  1922. case ISD::BSWAP: {
  1923. SDValue Src = Op.getOperand(0);
  1924. // If the only bits demanded come from one byte of the bswap result,
  1925. // just shift the input byte into position to eliminate the bswap.
  1926. unsigned NLZ = DemandedBits.countLeadingZeros();
  1927. unsigned NTZ = DemandedBits.countTrailingZeros();
  1928. // Round NTZ down to the next byte. If we have 11 trailing zeros, then
  1929. // we need all the bits down to bit 8. Likewise, round NLZ. If we
  1930. // have 14 leading zeros, round to 8.
  1931. NLZ = alignDown(NLZ, 8);
  1932. NTZ = alignDown(NTZ, 8);
  1933. // If we need exactly one byte, we can do this transformation.
  1934. if (BitWidth - NLZ - NTZ == 8) {
  1935. // Replace this with either a left or right shift to get the byte into
  1936. // the right place.
  1937. unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
  1938. if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
  1939. EVT ShiftAmtTy = getShiftAmountTy(VT, DL);
  1940. unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
  1941. SDValue ShAmt = TLO.DAG.getConstant(ShiftAmount, dl, ShiftAmtTy);
  1942. SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
  1943. return TLO.CombineTo(Op, NewOp);
  1944. }
  1945. }
  1946. APInt DemandedSrcBits = DemandedBits.byteSwap();
  1947. if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
  1948. Depth + 1))
  1949. return true;
  1950. Known.One = Known2.One.byteSwap();
  1951. Known.Zero = Known2.Zero.byteSwap();
  1952. break;
  1953. }
  1954. case ISD::CTPOP: {
  1955. // If only 1 bit is demanded, replace with PARITY as long as we're before
  1956. // op legalization.
  1957. // FIXME: Limit to scalars for now.
  1958. if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
  1959. return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
  1960. Op.getOperand(0)));
  1961. Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
  1962. break;
  1963. }
  1964. case ISD::SIGN_EXTEND_INREG: {
  1965. SDValue Op0 = Op.getOperand(0);
  1966. EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
  1967. unsigned ExVTBits = ExVT.getScalarSizeInBits();
  1968. // If we only care about the highest bit, don't bother shifting right.
  1969. if (DemandedBits.isSignMask()) {
  1970. unsigned MinSignedBits =
  1971. TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts, Depth + 1);
  1972. bool AlreadySignExtended = ExVTBits >= MinSignedBits;
  1973. // However if the input is already sign extended we expect the sign
  1974. // extension to be dropped altogether later and do not simplify.
  1975. if (!AlreadySignExtended) {
  1976. // Compute the correct shift amount type, which must be getShiftAmountTy
  1977. // for scalar types after legalization.
  1978. SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ExVTBits, dl,
  1979. getShiftAmountTy(VT, DL));
  1980. return TLO.CombineTo(Op,
  1981. TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
  1982. }
  1983. }
  1984. // If none of the extended bits are demanded, eliminate the sextinreg.
  1985. if (DemandedBits.getActiveBits() <= ExVTBits)
  1986. return TLO.CombineTo(Op, Op0);
  1987. APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
  1988. // Since the sign extended bits are demanded, we know that the sign
  1989. // bit is demanded.
  1990. InputDemandedBits.setBit(ExVTBits - 1);
  1991. if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO,
  1992. Depth + 1))
  1993. return true;
  1994. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  1995. // If the sign bit of the input is known set or clear, then we know the
  1996. // top bits of the result.
  1997. // If the input sign bit is known zero, convert this into a zero extension.
  1998. if (Known.Zero[ExVTBits - 1])
  1999. return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
  2000. APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
  2001. if (Known.One[ExVTBits - 1]) { // Input sign bit known set
  2002. Known.One.setBitsFrom(ExVTBits);
  2003. Known.Zero &= Mask;
  2004. } else { // Input sign bit unknown
  2005. Known.Zero &= Mask;
  2006. Known.One &= Mask;
  2007. }
  2008. break;
  2009. }
  2010. case ISD::BUILD_PAIR: {
  2011. EVT HalfVT = Op.getOperand(0).getValueType();
  2012. unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
  2013. APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
  2014. APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
  2015. KnownBits KnownLo, KnownHi;
  2016. if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
  2017. return true;
  2018. if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
  2019. return true;
  2020. Known = KnownHi.concat(KnownLo);
  2021. break;
  2022. }
  2023. case ISD::ZERO_EXTEND_VECTOR_INREG:
  2024. if (VT.isScalableVector())
  2025. return false;
  2026. [[fallthrough]];
  2027. case ISD::ZERO_EXTEND: {
  2028. SDValue Src = Op.getOperand(0);
  2029. EVT SrcVT = Src.getValueType();
  2030. unsigned InBits = SrcVT.getScalarSizeInBits();
  2031. unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
  2032. bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
  2033. // If none of the top bits are demanded, convert this into an any_extend.
  2034. if (DemandedBits.getActiveBits() <= InBits) {
  2035. // If we only need the non-extended bits of the bottom element
  2036. // then we can just bitcast to the result.
  2037. if (IsLE && IsVecInReg && DemandedElts == 1 &&
  2038. VT.getSizeInBits() == SrcVT.getSizeInBits())
  2039. return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
  2040. unsigned Opc =
  2041. IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
  2042. if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
  2043. return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
  2044. }
  2045. APInt InDemandedBits = DemandedBits.trunc(InBits);
  2046. APInt InDemandedElts = DemandedElts.zext(InElts);
  2047. if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
  2048. Depth + 1))
  2049. return true;
  2050. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  2051. assert(Known.getBitWidth() == InBits && "Src width has changed?");
  2052. Known = Known.zext(BitWidth);
  2053. // Attempt to avoid multi-use ops if we don't need anything from them.
  2054. if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
  2055. Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
  2056. return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
  2057. break;
  2058. }
  2059. case ISD::SIGN_EXTEND_VECTOR_INREG:
  2060. if (VT.isScalableVector())
  2061. return false;
  2062. [[fallthrough]];
  2063. case ISD::SIGN_EXTEND: {
  2064. SDValue Src = Op.getOperand(0);
  2065. EVT SrcVT = Src.getValueType();
  2066. unsigned InBits = SrcVT.getScalarSizeInBits();
  2067. unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
  2068. bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
  2069. // If none of the top bits are demanded, convert this into an any_extend.
  2070. if (DemandedBits.getActiveBits() <= InBits) {
  2071. // If we only need the non-extended bits of the bottom element
  2072. // then we can just bitcast to the result.
  2073. if (IsLE && IsVecInReg && DemandedElts == 1 &&
  2074. VT.getSizeInBits() == SrcVT.getSizeInBits())
  2075. return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
  2076. unsigned Opc =
  2077. IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
  2078. if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
  2079. return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
  2080. }
  2081. APInt InDemandedBits = DemandedBits.trunc(InBits);
  2082. APInt InDemandedElts = DemandedElts.zext(InElts);
  2083. // Since some of the sign extended bits are demanded, we know that the sign
  2084. // bit is demanded.
  2085. InDemandedBits.setBit(InBits - 1);
  2086. if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
  2087. Depth + 1))
  2088. return true;
  2089. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  2090. assert(Known.getBitWidth() == InBits && "Src width has changed?");
  2091. // If the sign bit is known one, the top bits match.
  2092. Known = Known.sext(BitWidth);
  2093. // If the sign bit is known zero, convert this to a zero extend.
  2094. if (Known.isNonNegative()) {
  2095. unsigned Opc =
  2096. IsVecInReg ? ISD::ZERO_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND;
  2097. if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
  2098. return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
  2099. }
  2100. // Attempt to avoid multi-use ops if we don't need anything from them.
  2101. if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
  2102. Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
  2103. return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
  2104. break;
  2105. }
  2106. case ISD::ANY_EXTEND_VECTOR_INREG:
  2107. if (VT.isScalableVector())
  2108. return false;
  2109. [[fallthrough]];
  2110. case ISD::ANY_EXTEND: {
  2111. SDValue Src = Op.getOperand(0);
  2112. EVT SrcVT = Src.getValueType();
  2113. unsigned InBits = SrcVT.getScalarSizeInBits();
  2114. unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
  2115. bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
  2116. // If we only need the bottom element then we can just bitcast.
  2117. // TODO: Handle ANY_EXTEND?
  2118. if (IsLE && IsVecInReg && DemandedElts == 1 &&
  2119. VT.getSizeInBits() == SrcVT.getSizeInBits())
  2120. return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
  2121. APInt InDemandedBits = DemandedBits.trunc(InBits);
  2122. APInt InDemandedElts = DemandedElts.zext(InElts);
  2123. if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
  2124. Depth + 1))
  2125. return true;
  2126. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  2127. assert(Known.getBitWidth() == InBits && "Src width has changed?");
  2128. Known = Known.anyext(BitWidth);
  2129. // Attempt to avoid multi-use ops if we don't need anything from them.
  2130. if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
  2131. Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
  2132. return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
  2133. break;
  2134. }
  2135. case ISD::TRUNCATE: {
  2136. SDValue Src = Op.getOperand(0);
  2137. // Simplify the input, using demanded bit information, and compute the known
  2138. // zero/one bits live out.
  2139. unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
  2140. APInt TruncMask = DemandedBits.zext(OperandBitWidth);
  2141. if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
  2142. Depth + 1))
  2143. return true;
  2144. Known = Known.trunc(BitWidth);
  2145. // Attempt to avoid multi-use ops if we don't need anything from them.
  2146. if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
  2147. Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
  2148. return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
  2149. // If the input is only used by this truncate, see if we can shrink it based
  2150. // on the known demanded bits.
  2151. switch (Src.getOpcode()) {
  2152. default:
  2153. break;
  2154. case ISD::SRL:
  2155. // Shrink SRL by a constant if none of the high bits shifted in are
  2156. // demanded.
  2157. if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
  2158. // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
  2159. // undesirable.
  2160. break;
  2161. if (Src.getNode()->hasOneUse()) {
  2162. const APInt *ShAmtC =
  2163. TLO.DAG.getValidShiftAmountConstant(Src, DemandedElts);
  2164. if (!ShAmtC || ShAmtC->uge(BitWidth))
  2165. break;
  2166. uint64_t ShVal = ShAmtC->getZExtValue();
  2167. APInt HighBits =
  2168. APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
  2169. HighBits.lshrInPlace(ShVal);
  2170. HighBits = HighBits.trunc(BitWidth);
  2171. if (!(HighBits & DemandedBits)) {
  2172. // None of the shifted in bits are needed. Add a truncate of the
  2173. // shift input, then shift it.
  2174. SDValue NewShAmt = TLO.DAG.getConstant(
  2175. ShVal, dl, getShiftAmountTy(VT, DL, TLO.LegalTypes()));
  2176. SDValue NewTrunc =
  2177. TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
  2178. return TLO.CombineTo(
  2179. Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
  2180. }
  2181. }
  2182. break;
  2183. }
  2184. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  2185. break;
  2186. }
  2187. case ISD::AssertZext: {
  2188. // AssertZext demands all of the high bits, plus any of the low bits
  2189. // demanded by its users.
  2190. EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
  2191. APInt InMask = APInt::getLowBitsSet(BitWidth, ZVT.getSizeInBits());
  2192. if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
  2193. TLO, Depth + 1))
  2194. return true;
  2195. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  2196. Known.Zero |= ~InMask;
  2197. Known.One &= (~Known.Zero);
  2198. break;
  2199. }
  2200. case ISD::EXTRACT_VECTOR_ELT: {
  2201. SDValue Src = Op.getOperand(0);
  2202. SDValue Idx = Op.getOperand(1);
  2203. ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
  2204. unsigned EltBitWidth = Src.getScalarValueSizeInBits();
  2205. if (SrcEltCnt.isScalable())
  2206. return false;
  2207. // Demand the bits from every vector element without a constant index.
  2208. unsigned NumSrcElts = SrcEltCnt.getFixedValue();
  2209. APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
  2210. if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
  2211. if (CIdx->getAPIntValue().ult(NumSrcElts))
  2212. DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
  2213. // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
  2214. // anything about the extended bits.
  2215. APInt DemandedSrcBits = DemandedBits;
  2216. if (BitWidth > EltBitWidth)
  2217. DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
  2218. if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
  2219. Depth + 1))
  2220. return true;
  2221. // Attempt to avoid multi-use ops if we don't need anything from them.
  2222. if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
  2223. if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
  2224. Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
  2225. SDValue NewOp =
  2226. TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
  2227. return TLO.CombineTo(Op, NewOp);
  2228. }
  2229. }
  2230. Known = Known2;
  2231. if (BitWidth > EltBitWidth)
  2232. Known = Known.anyext(BitWidth);
  2233. break;
  2234. }
  2235. case ISD::BITCAST: {
  2236. if (VT.isScalableVector())
  2237. return false;
  2238. SDValue Src = Op.getOperand(0);
  2239. EVT SrcVT = Src.getValueType();
  2240. unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
  2241. // If this is an FP->Int bitcast and if the sign bit is the only
  2242. // thing demanded, turn this into a FGETSIGN.
  2243. if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
  2244. DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
  2245. SrcVT.isFloatingPoint()) {
  2246. bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
  2247. bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
  2248. if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
  2249. SrcVT != MVT::f128) {
  2250. // Cannot eliminate/lower SHL for f128 yet.
  2251. EVT Ty = OpVTLegal ? VT : MVT::i32;
  2252. // Make a FGETSIGN + SHL to move the sign bit into the appropriate
  2253. // place. We expect the SHL to be eliminated by other optimizations.
  2254. SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
  2255. unsigned OpVTSizeInBits = Op.getValueSizeInBits();
  2256. if (!OpVTLegal && OpVTSizeInBits > 32)
  2257. Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
  2258. unsigned ShVal = Op.getValueSizeInBits() - 1;
  2259. SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
  2260. return TLO.CombineTo(Op,
  2261. TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
  2262. }
  2263. }
  2264. // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
  2265. // Demand the elt/bit if any of the original elts/bits are demanded.
  2266. if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
  2267. unsigned Scale = BitWidth / NumSrcEltBits;
  2268. unsigned NumSrcElts = SrcVT.getVectorNumElements();
  2269. APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
  2270. APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
  2271. for (unsigned i = 0; i != Scale; ++i) {
  2272. unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
  2273. unsigned BitOffset = EltOffset * NumSrcEltBits;
  2274. APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
  2275. if (!Sub.isZero()) {
  2276. DemandedSrcBits |= Sub;
  2277. for (unsigned j = 0; j != NumElts; ++j)
  2278. if (DemandedElts[j])
  2279. DemandedSrcElts.setBit((j * Scale) + i);
  2280. }
  2281. }
  2282. APInt KnownSrcUndef, KnownSrcZero;
  2283. if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
  2284. KnownSrcZero, TLO, Depth + 1))
  2285. return true;
  2286. KnownBits KnownSrcBits;
  2287. if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
  2288. KnownSrcBits, TLO, Depth + 1))
  2289. return true;
  2290. } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
  2291. // TODO - bigendian once we have test coverage.
  2292. unsigned Scale = NumSrcEltBits / BitWidth;
  2293. unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
  2294. APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
  2295. APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
  2296. for (unsigned i = 0; i != NumElts; ++i)
  2297. if (DemandedElts[i]) {
  2298. unsigned Offset = (i % Scale) * BitWidth;
  2299. DemandedSrcBits.insertBits(DemandedBits, Offset);
  2300. DemandedSrcElts.setBit(i / Scale);
  2301. }
  2302. if (SrcVT.isVector()) {
  2303. APInt KnownSrcUndef, KnownSrcZero;
  2304. if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
  2305. KnownSrcZero, TLO, Depth + 1))
  2306. return true;
  2307. }
  2308. KnownBits KnownSrcBits;
  2309. if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
  2310. KnownSrcBits, TLO, Depth + 1))
  2311. return true;
  2312. }
  2313. // If this is a bitcast, let computeKnownBits handle it. Only do this on a
  2314. // recursive call where Known may be useful to the caller.
  2315. if (Depth > 0) {
  2316. Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
  2317. return false;
  2318. }
  2319. break;
  2320. }
  2321. case ISD::MUL:
  2322. if (DemandedBits.isPowerOf2()) {
  2323. // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
  2324. // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
  2325. // odd (has LSB set), then the left-shifted low bit of X is the answer.
  2326. unsigned CTZ = DemandedBits.countTrailingZeros();
  2327. ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
  2328. if (C && C->getAPIntValue().countTrailingZeros() == CTZ) {
  2329. EVT ShiftAmtTy = getShiftAmountTy(VT, TLO.DAG.getDataLayout());
  2330. SDValue AmtC = TLO.DAG.getConstant(CTZ, dl, ShiftAmtTy);
  2331. SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
  2332. return TLO.CombineTo(Op, Shl);
  2333. }
  2334. }
  2335. // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
  2336. // X * X is odd iff X is odd.
  2337. // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
  2338. if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) {
  2339. SDValue One = TLO.DAG.getConstant(1, dl, VT);
  2340. SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
  2341. return TLO.CombineTo(Op, And1);
  2342. }
  2343. [[fallthrough]];
  2344. case ISD::ADD:
  2345. case ISD::SUB: {
  2346. // Add, Sub, and Mul don't demand any bits in positions beyond that
  2347. // of the highest bit demanded of them.
  2348. SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
  2349. SDNodeFlags Flags = Op.getNode()->getFlags();
  2350. unsigned DemandedBitsLZ = DemandedBits.countLeadingZeros();
  2351. APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
  2352. if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO,
  2353. Depth + 1) ||
  2354. SimplifyDemandedBits(Op1, LoMask, DemandedElts, Known2, TLO,
  2355. Depth + 1) ||
  2356. // See if the operation should be performed at a smaller bit width.
  2357. ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
  2358. if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
  2359. // Disable the nsw and nuw flags. We can no longer guarantee that we
  2360. // won't wrap after simplification.
  2361. Flags.setNoSignedWrap(false);
  2362. Flags.setNoUnsignedWrap(false);
  2363. Op->setFlags(Flags);
  2364. }
  2365. return true;
  2366. }
  2367. // neg x with only low bit demanded is simply x.
  2368. if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
  2369. isa<ConstantSDNode>(Op0) && cast<ConstantSDNode>(Op0)->isZero())
  2370. return TLO.CombineTo(Op, Op1);
  2371. // Attempt to avoid multi-use ops if we don't need anything from them.
  2372. if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
  2373. SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
  2374. Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
  2375. SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
  2376. Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
  2377. if (DemandedOp0 || DemandedOp1) {
  2378. Flags.setNoSignedWrap(false);
  2379. Flags.setNoUnsignedWrap(false);
  2380. Op0 = DemandedOp0 ? DemandedOp0 : Op0;
  2381. Op1 = DemandedOp1 ? DemandedOp1 : Op1;
  2382. SDValue NewOp =
  2383. TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
  2384. return TLO.CombineTo(Op, NewOp);
  2385. }
  2386. }
  2387. // If we have a constant operand, we may be able to turn it into -1 if we
  2388. // do not demand the high bits. This can make the constant smaller to
  2389. // encode, allow more general folding, or match specialized instruction
  2390. // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
  2391. // is probably not useful (and could be detrimental).
  2392. ConstantSDNode *C = isConstOrConstSplat(Op1);
  2393. APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
  2394. if (C && !C->isAllOnes() && !C->isOne() &&
  2395. (C->getAPIntValue() | HighMask).isAllOnes()) {
  2396. SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
  2397. // Disable the nsw and nuw flags. We can no longer guarantee that we
  2398. // won't wrap after simplification.
  2399. Flags.setNoSignedWrap(false);
  2400. Flags.setNoUnsignedWrap(false);
  2401. SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags);
  2402. return TLO.CombineTo(Op, NewOp);
  2403. }
  2404. // Match a multiply with a disguised negated-power-of-2 and convert to a
  2405. // an equivalent shift-left amount.
  2406. // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
  2407. auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
  2408. if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
  2409. return 0;
  2410. // Don't touch opaque constants. Also, ignore zero and power-of-2
  2411. // multiplies. Those will get folded later.
  2412. ConstantSDNode *MulC = isConstOrConstSplat(Mul.getOperand(1));
  2413. if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
  2414. !MulC->getAPIntValue().isPowerOf2()) {
  2415. APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
  2416. if (UnmaskedC.isNegatedPowerOf2())
  2417. return (-UnmaskedC).logBase2();
  2418. }
  2419. return 0;
  2420. };
  2421. auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y, unsigned ShlAmt) {
  2422. EVT ShiftAmtTy = getShiftAmountTy(VT, TLO.DAG.getDataLayout());
  2423. SDValue ShlAmtC = TLO.DAG.getConstant(ShlAmt, dl, ShiftAmtTy);
  2424. SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC);
  2425. SDValue Res = TLO.DAG.getNode(NT, dl, VT, Y, Shl);
  2426. return TLO.CombineTo(Op, Res);
  2427. };
  2428. if (isOperationLegalOrCustom(ISD::SHL, VT)) {
  2429. if (Op.getOpcode() == ISD::ADD) {
  2430. // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
  2431. if (unsigned ShAmt = getShiftLeftAmt(Op0))
  2432. return foldMul(ISD::SUB, Op0.getOperand(0), Op1, ShAmt);
  2433. // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
  2434. if (unsigned ShAmt = getShiftLeftAmt(Op1))
  2435. return foldMul(ISD::SUB, Op1.getOperand(0), Op0, ShAmt);
  2436. }
  2437. if (Op.getOpcode() == ISD::SUB) {
  2438. // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
  2439. if (unsigned ShAmt = getShiftLeftAmt(Op1))
  2440. return foldMul(ISD::ADD, Op1.getOperand(0), Op0, ShAmt);
  2441. }
  2442. }
  2443. [[fallthrough]];
  2444. }
  2445. default:
  2446. // We also ask the target about intrinsics (which could be specific to it).
  2447. if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
  2448. Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
  2449. // TODO: Probably okay to remove after audit; here to reduce change size
  2450. // in initial enablement patch for scalable vectors
  2451. if (Op.getValueType().isScalableVector())
  2452. break;
  2453. if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
  2454. Known, TLO, Depth))
  2455. return true;
  2456. break;
  2457. }
  2458. // Just use computeKnownBits to compute output bits.
  2459. Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
  2460. break;
  2461. }
  2462. // If we know the value of all of the demanded bits, return this as a
  2463. // constant.
  2464. if (!isTargetCanonicalConstantNode(Op) &&
  2465. DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
  2466. // Avoid folding to a constant if any OpaqueConstant is involved.
  2467. const SDNode *N = Op.getNode();
  2468. for (SDNode *Op :
  2469. llvm::make_range(SDNodeIterator::begin(N), SDNodeIterator::end(N))) {
  2470. if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
  2471. if (C->isOpaque())
  2472. return false;
  2473. }
  2474. if (VT.isInteger())
  2475. return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
  2476. if (VT.isFloatingPoint())
  2477. return TLO.CombineTo(
  2478. Op,
  2479. TLO.DAG.getConstantFP(
  2480. APFloat(TLO.DAG.EVTToAPFloatSemantics(VT), Known.One), dl, VT));
  2481. }
  2482. // A multi use 'all demanded elts' simplify failed to find any knownbits.
  2483. // Try again just for the original demanded elts.
  2484. // Ensure we do this AFTER constant folding above.
  2485. if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
  2486. Known = TLO.DAG.computeKnownBits(Op, OriginalDemandedElts, Depth);
  2487. return false;
  2488. }
  2489. bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
  2490. const APInt &DemandedElts,
  2491. DAGCombinerInfo &DCI) const {
  2492. SelectionDAG &DAG = DCI.DAG;
  2493. TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
  2494. !DCI.isBeforeLegalizeOps());
  2495. APInt KnownUndef, KnownZero;
  2496. bool Simplified =
  2497. SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
  2498. if (Simplified) {
  2499. DCI.AddToWorklist(Op.getNode());
  2500. DCI.CommitTargetLoweringOpt(TLO);
  2501. }
  2502. return Simplified;
  2503. }
  2504. /// Given a vector binary operation and known undefined elements for each input
  2505. /// operand, compute whether each element of the output is undefined.
  2506. static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
  2507. const APInt &UndefOp0,
  2508. const APInt &UndefOp1) {
  2509. EVT VT = BO.getValueType();
  2510. assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() &&
  2511. "Vector binop only");
  2512. EVT EltVT = VT.getVectorElementType();
  2513. unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1;
  2514. assert(UndefOp0.getBitWidth() == NumElts &&
  2515. UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
  2516. auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
  2517. const APInt &UndefVals) {
  2518. if (UndefVals[Index])
  2519. return DAG.getUNDEF(EltVT);
  2520. if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
  2521. // Try hard to make sure that the getNode() call is not creating temporary
  2522. // nodes. Ignore opaque integers because they do not constant fold.
  2523. SDValue Elt = BV->getOperand(Index);
  2524. auto *C = dyn_cast<ConstantSDNode>(Elt);
  2525. if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
  2526. return Elt;
  2527. }
  2528. return SDValue();
  2529. };
  2530. APInt KnownUndef = APInt::getZero(NumElts);
  2531. for (unsigned i = 0; i != NumElts; ++i) {
  2532. // If both inputs for this element are either constant or undef and match
  2533. // the element type, compute the constant/undef result for this element of
  2534. // the vector.
  2535. // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
  2536. // not handle FP constants. The code within getNode() should be refactored
  2537. // to avoid the danger of creating a bogus temporary node here.
  2538. SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
  2539. SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
  2540. if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
  2541. if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
  2542. KnownUndef.setBit(i);
  2543. }
  2544. return KnownUndef;
  2545. }
  2546. bool TargetLowering::SimplifyDemandedVectorElts(
  2547. SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
  2548. APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
  2549. bool AssumeSingleUse) const {
  2550. EVT VT = Op.getValueType();
  2551. unsigned Opcode = Op.getOpcode();
  2552. APInt DemandedElts = OriginalDemandedElts;
  2553. unsigned NumElts = DemandedElts.getBitWidth();
  2554. assert(VT.isVector() && "Expected vector op");
  2555. KnownUndef = KnownZero = APInt::getZero(NumElts);
  2556. const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo();
  2557. if (!TLI.shouldSimplifyDemandedVectorElts(Op, TLO))
  2558. return false;
  2559. // TODO: For now we assume we know nothing about scalable vectors.
  2560. if (VT.isScalableVector())
  2561. return false;
  2562. assert(VT.getVectorNumElements() == NumElts &&
  2563. "Mask size mismatches value type element count!");
  2564. // Undef operand.
  2565. if (Op.isUndef()) {
  2566. KnownUndef.setAllBits();
  2567. return false;
  2568. }
  2569. // If Op has other users, assume that all elements are needed.
  2570. if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
  2571. DemandedElts.setAllBits();
  2572. // Not demanding any elements from Op.
  2573. if (DemandedElts == 0) {
  2574. KnownUndef.setAllBits();
  2575. return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
  2576. }
  2577. // Limit search depth.
  2578. if (Depth >= SelectionDAG::MaxRecursionDepth)
  2579. return false;
  2580. SDLoc DL(Op);
  2581. unsigned EltSizeInBits = VT.getScalarSizeInBits();
  2582. bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
  2583. // Helper for demanding the specified elements and all the bits of both binary
  2584. // operands.
  2585. auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
  2586. SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
  2587. TLO.DAG, Depth + 1);
  2588. SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
  2589. TLO.DAG, Depth + 1);
  2590. if (NewOp0 || NewOp1) {
  2591. SDValue NewOp = TLO.DAG.getNode(
  2592. Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0, NewOp1 ? NewOp1 : Op1);
  2593. return TLO.CombineTo(Op, NewOp);
  2594. }
  2595. return false;
  2596. };
  2597. switch (Opcode) {
  2598. case ISD::SCALAR_TO_VECTOR: {
  2599. if (!DemandedElts[0]) {
  2600. KnownUndef.setAllBits();
  2601. return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
  2602. }
  2603. SDValue ScalarSrc = Op.getOperand(0);
  2604. if (ScalarSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
  2605. SDValue Src = ScalarSrc.getOperand(0);
  2606. SDValue Idx = ScalarSrc.getOperand(1);
  2607. EVT SrcVT = Src.getValueType();
  2608. ElementCount SrcEltCnt = SrcVT.getVectorElementCount();
  2609. if (SrcEltCnt.isScalable())
  2610. return false;
  2611. unsigned NumSrcElts = SrcEltCnt.getFixedValue();
  2612. if (isNullConstant(Idx)) {
  2613. APInt SrcDemandedElts = APInt::getOneBitSet(NumSrcElts, 0);
  2614. APInt SrcUndef = KnownUndef.zextOrTrunc(NumSrcElts);
  2615. APInt SrcZero = KnownZero.zextOrTrunc(NumSrcElts);
  2616. if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
  2617. TLO, Depth + 1))
  2618. return true;
  2619. }
  2620. }
  2621. KnownUndef.setHighBits(NumElts - 1);
  2622. break;
  2623. }
  2624. case ISD::BITCAST: {
  2625. SDValue Src = Op.getOperand(0);
  2626. EVT SrcVT = Src.getValueType();
  2627. // We only handle vectors here.
  2628. // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
  2629. if (!SrcVT.isVector())
  2630. break;
  2631. // Fast handling of 'identity' bitcasts.
  2632. unsigned NumSrcElts = SrcVT.getVectorNumElements();
  2633. if (NumSrcElts == NumElts)
  2634. return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
  2635. KnownZero, TLO, Depth + 1);
  2636. APInt SrcDemandedElts, SrcZero, SrcUndef;
  2637. // Bitcast from 'large element' src vector to 'small element' vector, we
  2638. // must demand a source element if any DemandedElt maps to it.
  2639. if ((NumElts % NumSrcElts) == 0) {
  2640. unsigned Scale = NumElts / NumSrcElts;
  2641. SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
  2642. if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
  2643. TLO, Depth + 1))
  2644. return true;
  2645. // Try calling SimplifyDemandedBits, converting demanded elts to the bits
  2646. // of the large element.
  2647. // TODO - bigendian once we have test coverage.
  2648. if (IsLE) {
  2649. unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
  2650. APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
  2651. for (unsigned i = 0; i != NumElts; ++i)
  2652. if (DemandedElts[i]) {
  2653. unsigned Ofs = (i % Scale) * EltSizeInBits;
  2654. SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
  2655. }
  2656. KnownBits Known;
  2657. if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
  2658. TLO, Depth + 1))
  2659. return true;
  2660. // The bitcast has split each wide element into a number of
  2661. // narrow subelements. We have just computed the Known bits
  2662. // for wide elements. See if element splitting results in
  2663. // some subelements being zero. Only for demanded elements!
  2664. for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
  2665. if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits)
  2666. .isAllOnes())
  2667. continue;
  2668. for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
  2669. unsigned Elt = Scale * SrcElt + SubElt;
  2670. if (DemandedElts[Elt])
  2671. KnownZero.setBit(Elt);
  2672. }
  2673. }
  2674. }
  2675. // If the src element is zero/undef then all the output elements will be -
  2676. // only demanded elements are guaranteed to be correct.
  2677. for (unsigned i = 0; i != NumSrcElts; ++i) {
  2678. if (SrcDemandedElts[i]) {
  2679. if (SrcZero[i])
  2680. KnownZero.setBits(i * Scale, (i + 1) * Scale);
  2681. if (SrcUndef[i])
  2682. KnownUndef.setBits(i * Scale, (i + 1) * Scale);
  2683. }
  2684. }
  2685. }
  2686. // Bitcast from 'small element' src vector to 'large element' vector, we
  2687. // demand all smaller source elements covered by the larger demanded element
  2688. // of this vector.
  2689. if ((NumSrcElts % NumElts) == 0) {
  2690. unsigned Scale = NumSrcElts / NumElts;
  2691. SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
  2692. if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
  2693. TLO, Depth + 1))
  2694. return true;
  2695. // If all the src elements covering an output element are zero/undef, then
  2696. // the output element will be as well, assuming it was demanded.
  2697. for (unsigned i = 0; i != NumElts; ++i) {
  2698. if (DemandedElts[i]) {
  2699. if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
  2700. KnownZero.setBit(i);
  2701. if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
  2702. KnownUndef.setBit(i);
  2703. }
  2704. }
  2705. }
  2706. break;
  2707. }
  2708. case ISD::BUILD_VECTOR: {
  2709. // Check all elements and simplify any unused elements with UNDEF.
  2710. if (!DemandedElts.isAllOnes()) {
  2711. // Don't simplify BROADCASTS.
  2712. if (llvm::any_of(Op->op_values(),
  2713. [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
  2714. SmallVector<SDValue, 32> Ops(Op->op_begin(), Op->op_end());
  2715. bool Updated = false;
  2716. for (unsigned i = 0; i != NumElts; ++i) {
  2717. if (!DemandedElts[i] && !Ops[i].isUndef()) {
  2718. Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
  2719. KnownUndef.setBit(i);
  2720. Updated = true;
  2721. }
  2722. }
  2723. if (Updated)
  2724. return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
  2725. }
  2726. }
  2727. for (unsigned i = 0; i != NumElts; ++i) {
  2728. SDValue SrcOp = Op.getOperand(i);
  2729. if (SrcOp.isUndef()) {
  2730. KnownUndef.setBit(i);
  2731. } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
  2732. (isNullConstant(SrcOp) || isNullFPConstant(SrcOp))) {
  2733. KnownZero.setBit(i);
  2734. }
  2735. }
  2736. break;
  2737. }
  2738. case ISD::CONCAT_VECTORS: {
  2739. EVT SubVT = Op.getOperand(0).getValueType();
  2740. unsigned NumSubVecs = Op.getNumOperands();
  2741. unsigned NumSubElts = SubVT.getVectorNumElements();
  2742. for (unsigned i = 0; i != NumSubVecs; ++i) {
  2743. SDValue SubOp = Op.getOperand(i);
  2744. APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
  2745. APInt SubUndef, SubZero;
  2746. if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
  2747. Depth + 1))
  2748. return true;
  2749. KnownUndef.insertBits(SubUndef, i * NumSubElts);
  2750. KnownZero.insertBits(SubZero, i * NumSubElts);
  2751. }
  2752. // Attempt to avoid multi-use ops if we don't need anything from them.
  2753. if (!DemandedElts.isAllOnes()) {
  2754. bool FoundNewSub = false;
  2755. SmallVector<SDValue, 2> DemandedSubOps;
  2756. for (unsigned i = 0; i != NumSubVecs; ++i) {
  2757. SDValue SubOp = Op.getOperand(i);
  2758. APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
  2759. SDValue NewSubOp = SimplifyMultipleUseDemandedVectorElts(
  2760. SubOp, SubElts, TLO.DAG, Depth + 1);
  2761. DemandedSubOps.push_back(NewSubOp ? NewSubOp : SubOp);
  2762. FoundNewSub = NewSubOp ? true : FoundNewSub;
  2763. }
  2764. if (FoundNewSub) {
  2765. SDValue NewOp =
  2766. TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, DemandedSubOps);
  2767. return TLO.CombineTo(Op, NewOp);
  2768. }
  2769. }
  2770. break;
  2771. }
  2772. case ISD::INSERT_SUBVECTOR: {
  2773. // Demand any elements from the subvector and the remainder from the src its
  2774. // inserted into.
  2775. SDValue Src = Op.getOperand(0);
  2776. SDValue Sub = Op.getOperand(1);
  2777. uint64_t Idx = Op.getConstantOperandVal(2);
  2778. unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
  2779. APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
  2780. APInt DemandedSrcElts = DemandedElts;
  2781. DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
  2782. APInt SubUndef, SubZero;
  2783. if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
  2784. Depth + 1))
  2785. return true;
  2786. // If none of the src operand elements are demanded, replace it with undef.
  2787. if (!DemandedSrcElts && !Src.isUndef())
  2788. return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
  2789. TLO.DAG.getUNDEF(VT), Sub,
  2790. Op.getOperand(2)));
  2791. if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
  2792. TLO, Depth + 1))
  2793. return true;
  2794. KnownUndef.insertBits(SubUndef, Idx);
  2795. KnownZero.insertBits(SubZero, Idx);
  2796. // Attempt to avoid multi-use ops if we don't need anything from them.
  2797. if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
  2798. SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
  2799. Src, DemandedSrcElts, TLO.DAG, Depth + 1);
  2800. SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
  2801. Sub, DemandedSubElts, TLO.DAG, Depth + 1);
  2802. if (NewSrc || NewSub) {
  2803. NewSrc = NewSrc ? NewSrc : Src;
  2804. NewSub = NewSub ? NewSub : Sub;
  2805. SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
  2806. NewSub, Op.getOperand(2));
  2807. return TLO.CombineTo(Op, NewOp);
  2808. }
  2809. }
  2810. break;
  2811. }
  2812. case ISD::EXTRACT_SUBVECTOR: {
  2813. // Offset the demanded elts by the subvector index.
  2814. SDValue Src = Op.getOperand(0);
  2815. if (Src.getValueType().isScalableVector())
  2816. break;
  2817. uint64_t Idx = Op.getConstantOperandVal(1);
  2818. unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
  2819. APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
  2820. APInt SrcUndef, SrcZero;
  2821. if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
  2822. Depth + 1))
  2823. return true;
  2824. KnownUndef = SrcUndef.extractBits(NumElts, Idx);
  2825. KnownZero = SrcZero.extractBits(NumElts, Idx);
  2826. // Attempt to avoid multi-use ops if we don't need anything from them.
  2827. if (!DemandedElts.isAllOnes()) {
  2828. SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
  2829. Src, DemandedSrcElts, TLO.DAG, Depth + 1);
  2830. if (NewSrc) {
  2831. SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
  2832. Op.getOperand(1));
  2833. return TLO.CombineTo(Op, NewOp);
  2834. }
  2835. }
  2836. break;
  2837. }
  2838. case ISD::INSERT_VECTOR_ELT: {
  2839. SDValue Vec = Op.getOperand(0);
  2840. SDValue Scl = Op.getOperand(1);
  2841. auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
  2842. // For a legal, constant insertion index, if we don't need this insertion
  2843. // then strip it, else remove it from the demanded elts.
  2844. if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
  2845. unsigned Idx = CIdx->getZExtValue();
  2846. if (!DemandedElts[Idx])
  2847. return TLO.CombineTo(Op, Vec);
  2848. APInt DemandedVecElts(DemandedElts);
  2849. DemandedVecElts.clearBit(Idx);
  2850. if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
  2851. KnownZero, TLO, Depth + 1))
  2852. return true;
  2853. KnownUndef.setBitVal(Idx, Scl.isUndef());
  2854. KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
  2855. break;
  2856. }
  2857. APInt VecUndef, VecZero;
  2858. if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
  2859. Depth + 1))
  2860. return true;
  2861. // Without knowing the insertion index we can't set KnownUndef/KnownZero.
  2862. break;
  2863. }
  2864. case ISD::VSELECT: {
  2865. SDValue Sel = Op.getOperand(0);
  2866. SDValue LHS = Op.getOperand(1);
  2867. SDValue RHS = Op.getOperand(2);
  2868. // Try to transform the select condition based on the current demanded
  2869. // elements.
  2870. APInt UndefSel, UndefZero;
  2871. if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, UndefZero, TLO,
  2872. Depth + 1))
  2873. return true;
  2874. // See if we can simplify either vselect operand.
  2875. APInt DemandedLHS(DemandedElts);
  2876. APInt DemandedRHS(DemandedElts);
  2877. APInt UndefLHS, ZeroLHS;
  2878. APInt UndefRHS, ZeroRHS;
  2879. if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
  2880. Depth + 1))
  2881. return true;
  2882. if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
  2883. Depth + 1))
  2884. return true;
  2885. KnownUndef = UndefLHS & UndefRHS;
  2886. KnownZero = ZeroLHS & ZeroRHS;
  2887. // If we know that the selected element is always zero, we don't need the
  2888. // select value element.
  2889. APInt DemandedSel = DemandedElts & ~KnownZero;
  2890. if (DemandedSel != DemandedElts)
  2891. if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, UndefZero, TLO,
  2892. Depth + 1))
  2893. return true;
  2894. break;
  2895. }
  2896. case ISD::VECTOR_SHUFFLE: {
  2897. SDValue LHS = Op.getOperand(0);
  2898. SDValue RHS = Op.getOperand(1);
  2899. ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
  2900. // Collect demanded elements from shuffle operands..
  2901. APInt DemandedLHS(NumElts, 0);
  2902. APInt DemandedRHS(NumElts, 0);
  2903. for (unsigned i = 0; i != NumElts; ++i) {
  2904. int M = ShuffleMask[i];
  2905. if (M < 0 || !DemandedElts[i])
  2906. continue;
  2907. assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
  2908. if (M < (int)NumElts)
  2909. DemandedLHS.setBit(M);
  2910. else
  2911. DemandedRHS.setBit(M - NumElts);
  2912. }
  2913. // See if we can simplify either shuffle operand.
  2914. APInt UndefLHS, ZeroLHS;
  2915. APInt UndefRHS, ZeroRHS;
  2916. if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
  2917. Depth + 1))
  2918. return true;
  2919. if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
  2920. Depth + 1))
  2921. return true;
  2922. // Simplify mask using undef elements from LHS/RHS.
  2923. bool Updated = false;
  2924. bool IdentityLHS = true, IdentityRHS = true;
  2925. SmallVector<int, 32> NewMask(ShuffleMask);
  2926. for (unsigned i = 0; i != NumElts; ++i) {
  2927. int &M = NewMask[i];
  2928. if (M < 0)
  2929. continue;
  2930. if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
  2931. (M >= (int)NumElts && UndefRHS[M - NumElts])) {
  2932. Updated = true;
  2933. M = -1;
  2934. }
  2935. IdentityLHS &= (M < 0) || (M == (int)i);
  2936. IdentityRHS &= (M < 0) || ((M - NumElts) == i);
  2937. }
  2938. // Update legal shuffle masks based on demanded elements if it won't reduce
  2939. // to Identity which can cause premature removal of the shuffle mask.
  2940. if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
  2941. SDValue LegalShuffle =
  2942. buildLegalVectorShuffle(VT, DL, LHS, RHS, NewMask, TLO.DAG);
  2943. if (LegalShuffle)
  2944. return TLO.CombineTo(Op, LegalShuffle);
  2945. }
  2946. // Propagate undef/zero elements from LHS/RHS.
  2947. for (unsigned i = 0; i != NumElts; ++i) {
  2948. int M = ShuffleMask[i];
  2949. if (M < 0) {
  2950. KnownUndef.setBit(i);
  2951. } else if (M < (int)NumElts) {
  2952. if (UndefLHS[M])
  2953. KnownUndef.setBit(i);
  2954. if (ZeroLHS[M])
  2955. KnownZero.setBit(i);
  2956. } else {
  2957. if (UndefRHS[M - NumElts])
  2958. KnownUndef.setBit(i);
  2959. if (ZeroRHS[M - NumElts])
  2960. KnownZero.setBit(i);
  2961. }
  2962. }
  2963. break;
  2964. }
  2965. case ISD::ANY_EXTEND_VECTOR_INREG:
  2966. case ISD::SIGN_EXTEND_VECTOR_INREG:
  2967. case ISD::ZERO_EXTEND_VECTOR_INREG: {
  2968. APInt SrcUndef, SrcZero;
  2969. SDValue Src = Op.getOperand(0);
  2970. unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
  2971. APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts);
  2972. if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
  2973. Depth + 1))
  2974. return true;
  2975. KnownZero = SrcZero.zextOrTrunc(NumElts);
  2976. KnownUndef = SrcUndef.zextOrTrunc(NumElts);
  2977. if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
  2978. Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
  2979. DemandedSrcElts == 1) {
  2980. // aext - if we just need the bottom element then we can bitcast.
  2981. return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
  2982. }
  2983. if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
  2984. // zext(undef) upper bits are guaranteed to be zero.
  2985. if (DemandedElts.isSubsetOf(KnownUndef))
  2986. return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
  2987. KnownUndef.clearAllBits();
  2988. // zext - if we just need the bottom element then we can mask:
  2989. // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
  2990. if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
  2991. Op->isOnlyUserOf(Src.getNode()) &&
  2992. Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
  2993. SDLoc DL(Op);
  2994. EVT SrcVT = Src.getValueType();
  2995. EVT SrcSVT = SrcVT.getScalarType();
  2996. SmallVector<SDValue> MaskElts;
  2997. MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
  2998. MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
  2999. SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
  3000. if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
  3001. ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
  3002. Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
  3003. return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
  3004. }
  3005. }
  3006. }
  3007. break;
  3008. }
  3009. // TODO: There are more binop opcodes that could be handled here - MIN,
  3010. // MAX, saturated math, etc.
  3011. case ISD::ADD: {
  3012. SDValue Op0 = Op.getOperand(0);
  3013. SDValue Op1 = Op.getOperand(1);
  3014. if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) {
  3015. APInt UndefLHS, ZeroLHS;
  3016. if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
  3017. Depth + 1, /*AssumeSingleUse*/ true))
  3018. return true;
  3019. }
  3020. [[fallthrough]];
  3021. }
  3022. case ISD::OR:
  3023. case ISD::XOR:
  3024. case ISD::SUB:
  3025. case ISD::FADD:
  3026. case ISD::FSUB:
  3027. case ISD::FMUL:
  3028. case ISD::FDIV:
  3029. case ISD::FREM: {
  3030. SDValue Op0 = Op.getOperand(0);
  3031. SDValue Op1 = Op.getOperand(1);
  3032. APInt UndefRHS, ZeroRHS;
  3033. if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
  3034. Depth + 1))
  3035. return true;
  3036. APInt UndefLHS, ZeroLHS;
  3037. if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
  3038. Depth + 1))
  3039. return true;
  3040. KnownZero = ZeroLHS & ZeroRHS;
  3041. KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
  3042. // Attempt to avoid multi-use ops if we don't need anything from them.
  3043. // TODO - use KnownUndef to relax the demandedelts?
  3044. if (!DemandedElts.isAllOnes())
  3045. if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
  3046. return true;
  3047. break;
  3048. }
  3049. case ISD::SHL:
  3050. case ISD::SRL:
  3051. case ISD::SRA:
  3052. case ISD::ROTL:
  3053. case ISD::ROTR: {
  3054. SDValue Op0 = Op.getOperand(0);
  3055. SDValue Op1 = Op.getOperand(1);
  3056. APInt UndefRHS, ZeroRHS;
  3057. if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
  3058. Depth + 1))
  3059. return true;
  3060. APInt UndefLHS, ZeroLHS;
  3061. if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
  3062. Depth + 1))
  3063. return true;
  3064. KnownZero = ZeroLHS;
  3065. KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
  3066. // Attempt to avoid multi-use ops if we don't need anything from them.
  3067. // TODO - use KnownUndef to relax the demandedelts?
  3068. if (!DemandedElts.isAllOnes())
  3069. if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
  3070. return true;
  3071. break;
  3072. }
  3073. case ISD::MUL:
  3074. case ISD::MULHU:
  3075. case ISD::MULHS:
  3076. case ISD::AND: {
  3077. SDValue Op0 = Op.getOperand(0);
  3078. SDValue Op1 = Op.getOperand(1);
  3079. APInt SrcUndef, SrcZero;
  3080. if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
  3081. Depth + 1))
  3082. return true;
  3083. // If we know that a demanded element was zero in Op1 we don't need to
  3084. // demand it in Op0 - its guaranteed to be zero.
  3085. APInt DemandedElts0 = DemandedElts & ~SrcZero;
  3086. if (SimplifyDemandedVectorElts(Op0, DemandedElts0, KnownUndef, KnownZero,
  3087. TLO, Depth + 1))
  3088. return true;
  3089. KnownUndef &= DemandedElts0;
  3090. KnownZero &= DemandedElts0;
  3091. // If every element pair has a zero/undef then just fold to zero.
  3092. // fold (and x, undef) -> 0 / (and x, 0) -> 0
  3093. // fold (mul x, undef) -> 0 / (mul x, 0) -> 0
  3094. if (DemandedElts.isSubsetOf(SrcZero | KnownZero | SrcUndef | KnownUndef))
  3095. return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
  3096. // If either side has a zero element, then the result element is zero, even
  3097. // if the other is an UNDEF.
  3098. // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
  3099. // and then handle 'and' nodes with the rest of the binop opcodes.
  3100. KnownZero |= SrcZero;
  3101. KnownUndef &= SrcUndef;
  3102. KnownUndef &= ~KnownZero;
  3103. // Attempt to avoid multi-use ops if we don't need anything from them.
  3104. if (!DemandedElts.isAllOnes())
  3105. if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
  3106. return true;
  3107. break;
  3108. }
  3109. case ISD::TRUNCATE:
  3110. case ISD::SIGN_EXTEND:
  3111. case ISD::ZERO_EXTEND:
  3112. if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
  3113. KnownZero, TLO, Depth + 1))
  3114. return true;
  3115. if (Op.getOpcode() == ISD::ZERO_EXTEND) {
  3116. // zext(undef) upper bits are guaranteed to be zero.
  3117. if (DemandedElts.isSubsetOf(KnownUndef))
  3118. return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
  3119. KnownUndef.clearAllBits();
  3120. }
  3121. break;
  3122. default: {
  3123. if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
  3124. if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
  3125. KnownZero, TLO, Depth))
  3126. return true;
  3127. } else {
  3128. KnownBits Known;
  3129. APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
  3130. if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
  3131. TLO, Depth, AssumeSingleUse))
  3132. return true;
  3133. }
  3134. break;
  3135. }
  3136. }
  3137. assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
  3138. // Constant fold all undef cases.
  3139. // TODO: Handle zero cases as well.
  3140. if (DemandedElts.isSubsetOf(KnownUndef))
  3141. return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
  3142. return false;
  3143. }
  3144. /// Determine which of the bits specified in Mask are known to be either zero or
  3145. /// one and return them in the Known.
  3146. void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
  3147. KnownBits &Known,
  3148. const APInt &DemandedElts,
  3149. const SelectionDAG &DAG,
  3150. unsigned Depth) const {
  3151. assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
  3152. Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
  3153. Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
  3154. Op.getOpcode() == ISD::INTRINSIC_VOID) &&
  3155. "Should use MaskedValueIsZero if you don't know whether Op"
  3156. " is a target node!");
  3157. Known.resetAll();
  3158. }
  3159. void TargetLowering::computeKnownBitsForTargetInstr(
  3160. GISelKnownBits &Analysis, Register R, KnownBits &Known,
  3161. const APInt &DemandedElts, const MachineRegisterInfo &MRI,
  3162. unsigned Depth) const {
  3163. Known.resetAll();
  3164. }
  3165. void TargetLowering::computeKnownBitsForFrameIndex(
  3166. const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
  3167. // The low bits are known zero if the pointer is aligned.
  3168. Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
  3169. }
  3170. Align TargetLowering::computeKnownAlignForTargetInstr(
  3171. GISelKnownBits &Analysis, Register R, const MachineRegisterInfo &MRI,
  3172. unsigned Depth) const {
  3173. return Align(1);
  3174. }
  3175. /// This method can be implemented by targets that want to expose additional
  3176. /// information about sign bits to the DAG Combiner.
  3177. unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
  3178. const APInt &,
  3179. const SelectionDAG &,
  3180. unsigned Depth) const {
  3181. assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
  3182. Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
  3183. Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
  3184. Op.getOpcode() == ISD::INTRINSIC_VOID) &&
  3185. "Should use ComputeNumSignBits if you don't know whether Op"
  3186. " is a target node!");
  3187. return 1;
  3188. }
  3189. unsigned TargetLowering::computeNumSignBitsForTargetInstr(
  3190. GISelKnownBits &Analysis, Register R, const APInt &DemandedElts,
  3191. const MachineRegisterInfo &MRI, unsigned Depth) const {
  3192. return 1;
  3193. }
  3194. bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
  3195. SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
  3196. TargetLoweringOpt &TLO, unsigned Depth) const {
  3197. assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
  3198. Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
  3199. Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
  3200. Op.getOpcode() == ISD::INTRINSIC_VOID) &&
  3201. "Should use SimplifyDemandedVectorElts if you don't know whether Op"
  3202. " is a target node!");
  3203. return false;
  3204. }
  3205. bool TargetLowering::SimplifyDemandedBitsForTargetNode(
  3206. SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
  3207. KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
  3208. assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
  3209. Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
  3210. Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
  3211. Op.getOpcode() == ISD::INTRINSIC_VOID) &&
  3212. "Should use SimplifyDemandedBits if you don't know whether Op"
  3213. " is a target node!");
  3214. computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
  3215. return false;
  3216. }
  3217. SDValue TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
  3218. SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
  3219. SelectionDAG &DAG, unsigned Depth) const {
  3220. assert(
  3221. (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
  3222. Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
  3223. Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
  3224. Op.getOpcode() == ISD::INTRINSIC_VOID) &&
  3225. "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
  3226. " is a target node!");
  3227. return SDValue();
  3228. }
  3229. SDValue
  3230. TargetLowering::buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0,
  3231. SDValue N1, MutableArrayRef<int> Mask,
  3232. SelectionDAG &DAG) const {
  3233. bool LegalMask = isShuffleMaskLegal(Mask, VT);
  3234. if (!LegalMask) {
  3235. std::swap(N0, N1);
  3236. ShuffleVectorSDNode::commuteMask(Mask);
  3237. LegalMask = isShuffleMaskLegal(Mask, VT);
  3238. }
  3239. if (!LegalMask)
  3240. return SDValue();
  3241. return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
  3242. }
  3243. const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const {
  3244. return nullptr;
  3245. }
  3246. bool TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
  3247. SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
  3248. bool PoisonOnly, unsigned Depth) const {
  3249. assert(
  3250. (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
  3251. Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
  3252. Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
  3253. Op.getOpcode() == ISD::INTRINSIC_VOID) &&
  3254. "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
  3255. " is a target node!");
  3256. return false;
  3257. }
  3258. bool TargetLowering::canCreateUndefOrPoisonForTargetNode(
  3259. SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
  3260. bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
  3261. assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
  3262. Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
  3263. Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
  3264. Op.getOpcode() == ISD::INTRINSIC_VOID) &&
  3265. "Should use canCreateUndefOrPoison if you don't know whether Op"
  3266. " is a target node!");
  3267. // Be conservative and return true.
  3268. return true;
  3269. }
  3270. bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
  3271. const SelectionDAG &DAG,
  3272. bool SNaN,
  3273. unsigned Depth) const {
  3274. assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
  3275. Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
  3276. Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
  3277. Op.getOpcode() == ISD::INTRINSIC_VOID) &&
  3278. "Should use isKnownNeverNaN if you don't know whether Op"
  3279. " is a target node!");
  3280. return false;
  3281. }
  3282. bool TargetLowering::isSplatValueForTargetNode(SDValue Op,
  3283. const APInt &DemandedElts,
  3284. APInt &UndefElts,
  3285. const SelectionDAG &DAG,
  3286. unsigned Depth) const {
  3287. assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
  3288. Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
  3289. Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
  3290. Op.getOpcode() == ISD::INTRINSIC_VOID) &&
  3291. "Should use isSplatValue if you don't know whether Op"
  3292. " is a target node!");
  3293. return false;
  3294. }
  3295. // FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
  3296. // work with truncating build vectors and vectors with elements of less than
  3297. // 8 bits.
  3298. bool TargetLowering::isConstTrueVal(SDValue N) const {
  3299. if (!N)
  3300. return false;
  3301. unsigned EltWidth;
  3302. APInt CVal;
  3303. if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
  3304. /*AllowTruncation=*/true)) {
  3305. CVal = CN->getAPIntValue();
  3306. EltWidth = N.getValueType().getScalarSizeInBits();
  3307. } else
  3308. return false;
  3309. // If this is a truncating splat, truncate the splat value.
  3310. // Otherwise, we may fail to match the expected values below.
  3311. if (EltWidth < CVal.getBitWidth())
  3312. CVal = CVal.trunc(EltWidth);
  3313. switch (getBooleanContents(N.getValueType())) {
  3314. case UndefinedBooleanContent:
  3315. return CVal[0];
  3316. case ZeroOrOneBooleanContent:
  3317. return CVal.isOne();
  3318. case ZeroOrNegativeOneBooleanContent:
  3319. return CVal.isAllOnes();
  3320. }
  3321. llvm_unreachable("Invalid boolean contents");
  3322. }
  3323. bool TargetLowering::isConstFalseVal(SDValue N) const {
  3324. if (!N)
  3325. return false;
  3326. const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
  3327. if (!CN) {
  3328. const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
  3329. if (!BV)
  3330. return false;
  3331. // Only interested in constant splats, we don't care about undef
  3332. // elements in identifying boolean constants and getConstantSplatNode
  3333. // returns NULL if all ops are undef;
  3334. CN = BV->getConstantSplatNode();
  3335. if (!CN)
  3336. return false;
  3337. }
  3338. if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
  3339. return !CN->getAPIntValue()[0];
  3340. return CN->isZero();
  3341. }
  3342. bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
  3343. bool SExt) const {
  3344. if (VT == MVT::i1)
  3345. return N->isOne();
  3346. TargetLowering::BooleanContent Cnt = getBooleanContents(VT);
  3347. switch (Cnt) {
  3348. case TargetLowering::ZeroOrOneBooleanContent:
  3349. // An extended value of 1 is always true, unless its original type is i1,
  3350. // in which case it will be sign extended to -1.
  3351. return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
  3352. case TargetLowering::UndefinedBooleanContent:
  3353. case TargetLowering::ZeroOrNegativeOneBooleanContent:
  3354. return N->isAllOnes() && SExt;
  3355. }
  3356. llvm_unreachable("Unexpected enumeration.");
  3357. }
  3358. /// This helper function of SimplifySetCC tries to optimize the comparison when
  3359. /// either operand of the SetCC node is a bitwise-and instruction.
  3360. SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
  3361. ISD::CondCode Cond, const SDLoc &DL,
  3362. DAGCombinerInfo &DCI) const {
  3363. if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
  3364. std::swap(N0, N1);
  3365. SelectionDAG &DAG = DCI.DAG;
  3366. EVT OpVT = N0.getValueType();
  3367. if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
  3368. (Cond != ISD::SETEQ && Cond != ISD::SETNE))
  3369. return SDValue();
  3370. // (X & Y) != 0 --> zextOrTrunc(X & Y)
  3371. // iff everything but LSB is known zero:
  3372. if (Cond == ISD::SETNE && isNullConstant(N1) &&
  3373. (getBooleanContents(OpVT) == TargetLowering::UndefinedBooleanContent ||
  3374. getBooleanContents(OpVT) == TargetLowering::ZeroOrOneBooleanContent)) {
  3375. unsigned NumEltBits = OpVT.getScalarSizeInBits();
  3376. APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);
  3377. if (DAG.MaskedValueIsZero(N0, UpperBits))
  3378. return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
  3379. }
  3380. // Try to eliminate a power-of-2 mask constant by converting to a signbit
  3381. // test in a narrow type that we can truncate to with no cost. Examples:
  3382. // (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
  3383. // (i32 X & 32768) != 0 --> (trunc X to i16) < 0
  3384. // TODO: This conservatively checks for type legality on the source and
  3385. // destination types. That may inhibit optimizations, but it also
  3386. // allows setcc->shift transforms that may be more beneficial.
  3387. auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
  3388. if (AndC && isNullConstant(N1) && AndC->getAPIntValue().isPowerOf2() &&
  3389. isTypeLegal(OpVT) && N0.hasOneUse()) {
  3390. EVT NarrowVT = EVT::getIntegerVT(*DAG.getContext(),
  3391. AndC->getAPIntValue().getActiveBits());
  3392. if (isTruncateFree(OpVT, NarrowVT) && isTypeLegal(NarrowVT)) {
  3393. SDValue Trunc = DAG.getZExtOrTrunc(N0.getOperand(0), DL, NarrowVT);
  3394. SDValue Zero = DAG.getConstant(0, DL, NarrowVT);
  3395. return DAG.getSetCC(DL, VT, Trunc, Zero,
  3396. Cond == ISD::SETEQ ? ISD::SETGE : ISD::SETLT);
  3397. }
  3398. }
  3399. // Match these patterns in any of their permutations:
  3400. // (X & Y) == Y
  3401. // (X & Y) != Y
  3402. SDValue X, Y;
  3403. if (N0.getOperand(0) == N1) {
  3404. X = N0.getOperand(1);
  3405. Y = N0.getOperand(0);
  3406. } else if (N0.getOperand(1) == N1) {
  3407. X = N0.getOperand(0);
  3408. Y = N0.getOperand(1);
  3409. } else {
  3410. return SDValue();
  3411. }
  3412. SDValue Zero = DAG.getConstant(0, DL, OpVT);
  3413. if (DAG.isKnownToBeAPowerOfTwo(Y)) {
  3414. // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
  3415. // Note that where Y is variable and is known to have at most one bit set
  3416. // (for example, if it is Z & 1) we cannot do this; the expressions are not
  3417. // equivalent when Y == 0.
  3418. assert(OpVT.isInteger());
  3419. Cond = ISD::getSetCCInverse(Cond, OpVT);
  3420. if (DCI.isBeforeLegalizeOps() ||
  3421. isCondCodeLegal(Cond, N0.getSimpleValueType()))
  3422. return DAG.getSetCC(DL, VT, N0, Zero, Cond);
  3423. } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
  3424. // If the target supports an 'and-not' or 'and-complement' logic operation,
  3425. // try to use that to make a comparison operation more efficient.
  3426. // But don't do this transform if the mask is a single bit because there are
  3427. // more efficient ways to deal with that case (for example, 'bt' on x86 or
  3428. // 'rlwinm' on PPC).
  3429. // Bail out if the compare operand that we want to turn into a zero is
  3430. // already a zero (otherwise, infinite loop).
  3431. auto *YConst = dyn_cast<ConstantSDNode>(Y);
  3432. if (YConst && YConst->isZero())
  3433. return SDValue();
  3434. // Transform this into: ~X & Y == 0.
  3435. SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
  3436. SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
  3437. return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
  3438. }
  3439. return SDValue();
  3440. }
  3441. /// There are multiple IR patterns that could be checking whether certain
  3442. /// truncation of a signed number would be lossy or not. The pattern which is
  3443. /// best at IR level, may not lower optimally. Thus, we want to unfold it.
  3444. /// We are looking for the following pattern: (KeptBits is a constant)
  3445. /// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
  3446. /// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
  3447. /// KeptBits also can't be 1, that would have been folded to %x dstcond 0
  3448. /// We will unfold it into the natural trunc+sext pattern:
  3449. /// ((%x << C) a>> C) dstcond %x
  3450. /// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
  3451. SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
  3452. EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
  3453. const SDLoc &DL) const {
  3454. // We must be comparing with a constant.
  3455. ConstantSDNode *C1;
  3456. if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
  3457. return SDValue();
  3458. // N0 should be: add %x, (1 << (KeptBits-1))
  3459. if (N0->getOpcode() != ISD::ADD)
  3460. return SDValue();
  3461. // And we must be 'add'ing a constant.
  3462. ConstantSDNode *C01;
  3463. if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
  3464. return SDValue();
  3465. SDValue X = N0->getOperand(0);
  3466. EVT XVT = X.getValueType();
  3467. // Validate constants ...
  3468. APInt I1 = C1->getAPIntValue();
  3469. ISD::CondCode NewCond;
  3470. if (Cond == ISD::CondCode::SETULT) {
  3471. NewCond = ISD::CondCode::SETEQ;
  3472. } else if (Cond == ISD::CondCode::SETULE) {
  3473. NewCond = ISD::CondCode::SETEQ;
  3474. // But need to 'canonicalize' the constant.
  3475. I1 += 1;
  3476. } else if (Cond == ISD::CondCode::SETUGT) {
  3477. NewCond = ISD::CondCode::SETNE;
  3478. // But need to 'canonicalize' the constant.
  3479. I1 += 1;
  3480. } else if (Cond == ISD::CondCode::SETUGE) {
  3481. NewCond = ISD::CondCode::SETNE;
  3482. } else
  3483. return SDValue();
  3484. APInt I01 = C01->getAPIntValue();
  3485. auto checkConstants = [&I1, &I01]() -> bool {
  3486. // Both of them must be power-of-two, and the constant from setcc is bigger.
  3487. return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
  3488. };
  3489. if (checkConstants()) {
  3490. // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
  3491. } else {
  3492. // What if we invert constants? (and the target predicate)
  3493. I1.negate();
  3494. I01.negate();
  3495. assert(XVT.isInteger());
  3496. NewCond = getSetCCInverse(NewCond, XVT);
  3497. if (!checkConstants())
  3498. return SDValue();
  3499. // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
  3500. }
  3501. // They are power-of-two, so which bit is set?
  3502. const unsigned KeptBits = I1.logBase2();
  3503. const unsigned KeptBitsMinusOne = I01.logBase2();
  3504. // Magic!
  3505. if (KeptBits != (KeptBitsMinusOne + 1))
  3506. return SDValue();
  3507. assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
  3508. // We don't want to do this in every single case.
  3509. SelectionDAG &DAG = DCI.DAG;
  3510. if (!DAG.getTargetLoweringInfo().shouldTransformSignedTruncationCheck(
  3511. XVT, KeptBits))
  3512. return SDValue();
  3513. const unsigned MaskedBits = XVT.getSizeInBits() - KeptBits;
  3514. assert(MaskedBits > 0 && MaskedBits < XVT.getSizeInBits() && "unreachable");
  3515. // Unfold into: ((%x << C) a>> C) cond %x
  3516. // Where 'cond' will be either 'eq' or 'ne'.
  3517. SDValue ShiftAmt = DAG.getConstant(MaskedBits, DL, XVT);
  3518. SDValue T0 = DAG.getNode(ISD::SHL, DL, XVT, X, ShiftAmt);
  3519. SDValue T1 = DAG.getNode(ISD::SRA, DL, XVT, T0, ShiftAmt);
  3520. SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, X, NewCond);
  3521. return T2;
  3522. }
  3523. // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
  3524. SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
  3525. EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
  3526. DAGCombinerInfo &DCI, const SDLoc &DL) const {
  3527. assert(isConstOrConstSplat(N1C) &&
  3528. isConstOrConstSplat(N1C)->getAPIntValue().isZero() &&
  3529. "Should be a comparison with 0.");
  3530. assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
  3531. "Valid only for [in]equality comparisons.");
  3532. unsigned NewShiftOpcode;
  3533. SDValue X, C, Y;
  3534. SelectionDAG &DAG = DCI.DAG;
  3535. const TargetLowering &TLI = DAG.getTargetLoweringInfo();
  3536. // Look for '(C l>>/<< Y)'.
  3537. auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI, &DAG](SDValue V) {
  3538. // The shift should be one-use.
  3539. if (!V.hasOneUse())
  3540. return false;
  3541. unsigned OldShiftOpcode = V.getOpcode();
  3542. switch (OldShiftOpcode) {
  3543. case ISD::SHL:
  3544. NewShiftOpcode = ISD::SRL;
  3545. break;
  3546. case ISD::SRL:
  3547. NewShiftOpcode = ISD::SHL;
  3548. break;
  3549. default:
  3550. return false; // must be a logical shift.
  3551. }
  3552. // We should be shifting a constant.
  3553. // FIXME: best to use isConstantOrConstantVector().
  3554. C = V.getOperand(0);
  3555. ConstantSDNode *CC =
  3556. isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
  3557. if (!CC)
  3558. return false;
  3559. Y = V.getOperand(1);
  3560. ConstantSDNode *XC =
  3561. isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
  3562. return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
  3563. X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
  3564. };
  3565. // LHS of comparison should be an one-use 'and'.
  3566. if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
  3567. return SDValue();
  3568. X = N0.getOperand(0);
  3569. SDValue Mask = N0.getOperand(1);
  3570. // 'and' is commutative!
  3571. if (!Match(Mask)) {
  3572. std::swap(X, Mask);
  3573. if (!Match(Mask))
  3574. return SDValue();
  3575. }
  3576. EVT VT = X.getValueType();
  3577. // Produce:
  3578. // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
  3579. SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
  3580. SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
  3581. SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
  3582. return T2;
  3583. }
  3584. /// Try to fold an equality comparison with a {add/sub/xor} binary operation as
  3585. /// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
  3586. /// handle the commuted versions of these patterns.
  3587. SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
  3588. ISD::CondCode Cond, const SDLoc &DL,
  3589. DAGCombinerInfo &DCI) const {
  3590. unsigned BOpcode = N0.getOpcode();
  3591. assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
  3592. "Unexpected binop");
  3593. assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
  3594. // (X + Y) == X --> Y == 0
  3595. // (X - Y) == X --> Y == 0
  3596. // (X ^ Y) == X --> Y == 0
  3597. SelectionDAG &DAG = DCI.DAG;
  3598. EVT OpVT = N0.getValueType();
  3599. SDValue X = N0.getOperand(0);
  3600. SDValue Y = N0.getOperand(1);
  3601. if (X == N1)
  3602. return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
  3603. if (Y != N1)
  3604. return SDValue();
  3605. // (X + Y) == Y --> X == 0
  3606. // (X ^ Y) == Y --> X == 0
  3607. if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
  3608. return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
  3609. // The shift would not be valid if the operands are boolean (i1).
  3610. if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
  3611. return SDValue();
  3612. // (X - Y) == Y --> X == Y << 1
  3613. EVT ShiftVT = getShiftAmountTy(OpVT, DAG.getDataLayout(),
  3614. !DCI.isBeforeLegalize());
  3615. SDValue One = DAG.getConstant(1, DL, ShiftVT);
  3616. SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
  3617. if (!DCI.isCalledByLegalizer())
  3618. DCI.AddToWorklist(YShl1.getNode());
  3619. return DAG.getSetCC(DL, VT, X, YShl1, Cond);
  3620. }
  3621. static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT,
  3622. SDValue N0, const APInt &C1,
  3623. ISD::CondCode Cond, const SDLoc &dl,
  3624. SelectionDAG &DAG) {
  3625. // Look through truncs that don't change the value of a ctpop.
  3626. // FIXME: Add vector support? Need to be careful with setcc result type below.
  3627. SDValue CTPOP = N0;
  3628. if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
  3629. N0.getScalarValueSizeInBits() > Log2_32(N0.getOperand(0).getScalarValueSizeInBits()))
  3630. CTPOP = N0.getOperand(0);
  3631. if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
  3632. return SDValue();
  3633. EVT CTVT = CTPOP.getValueType();
  3634. SDValue CTOp = CTPOP.getOperand(0);
  3635. // Expand a power-of-2-or-zero comparison based on ctpop:
  3636. // (ctpop x) u< 2 -> (x & x-1) == 0
  3637. // (ctpop x) u> 1 -> (x & x-1) != 0
  3638. if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
  3639. // Keep the CTPOP if it is a legal vector op.
  3640. if (CTVT.isVector() && TLI.isOperationLegal(ISD::CTPOP, CTVT))
  3641. return SDValue();
  3642. unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
  3643. if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
  3644. return SDValue();
  3645. if (C1 == 0 && (Cond == ISD::SETULT))
  3646. return SDValue(); // This is handled elsewhere.
  3647. unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
  3648. SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
  3649. SDValue Result = CTOp;
  3650. for (unsigned i = 0; i < Passes; i++) {
  3651. SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
  3652. Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
  3653. }
  3654. ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
  3655. return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
  3656. }
  3657. // Expand a power-of-2 comparison based on ctpop:
  3658. // (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0)
  3659. // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0)
  3660. if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
  3661. // Keep the CTPOP if it is legal.
  3662. if (TLI.isOperationLegal(ISD::CTPOP, CTVT))
  3663. return SDValue();
  3664. SDValue Zero = DAG.getConstant(0, dl, CTVT);
  3665. SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
  3666. assert(CTVT.isInteger());
  3667. ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, CTVT);
  3668. SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
  3669. SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
  3670. SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond);
  3671. SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
  3672. unsigned LogicOpcode = Cond == ISD::SETEQ ? ISD::AND : ISD::OR;
  3673. return DAG.getNode(LogicOpcode, dl, VT, LHS, RHS);
  3674. }
  3675. return SDValue();
  3676. }
  3677. static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1,
  3678. ISD::CondCode Cond, const SDLoc &dl,
  3679. SelectionDAG &DAG) {
  3680. if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
  3681. return SDValue();
  3682. auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
  3683. if (!C1 || !(C1->isZero() || C1->isAllOnes()))
  3684. return SDValue();
  3685. auto getRotateSource = [](SDValue X) {
  3686. if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
  3687. return X.getOperand(0);
  3688. return SDValue();
  3689. };
  3690. // Peek through a rotated value compared against 0 or -1:
  3691. // (rot X, Y) == 0/-1 --> X == 0/-1
  3692. // (rot X, Y) != 0/-1 --> X != 0/-1
  3693. if (SDValue R = getRotateSource(N0))
  3694. return DAG.getSetCC(dl, VT, R, N1, Cond);
  3695. // Peek through an 'or' of a rotated value compared against 0:
  3696. // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
  3697. // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
  3698. //
  3699. // TODO: Add the 'and' with -1 sibling.
  3700. // TODO: Recurse through a series of 'or' ops to find the rotate.
  3701. EVT OpVT = N0.getValueType();
  3702. if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
  3703. if (SDValue R = getRotateSource(N0.getOperand(0))) {
  3704. SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
  3705. return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
  3706. }
  3707. if (SDValue R = getRotateSource(N0.getOperand(1))) {
  3708. SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
  3709. return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
  3710. }
  3711. }
  3712. return SDValue();
  3713. }
  3714. static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1,
  3715. ISD::CondCode Cond, const SDLoc &dl,
  3716. SelectionDAG &DAG) {
  3717. // If we are testing for all-bits-clear, we might be able to do that with
  3718. // less shifting since bit-order does not matter.
  3719. if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
  3720. return SDValue();
  3721. auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
  3722. if (!C1 || !C1->isZero())
  3723. return SDValue();
  3724. if (!N0.hasOneUse() ||
  3725. (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
  3726. return SDValue();
  3727. unsigned BitWidth = N0.getScalarValueSizeInBits();
  3728. auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2));
  3729. if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth))
  3730. return SDValue();
  3731. // Canonicalize fshr as fshl to reduce pattern-matching.
  3732. unsigned ShAmt = ShAmtC->getZExtValue();
  3733. if (N0.getOpcode() == ISD::FSHR)
  3734. ShAmt = BitWidth - ShAmt;
  3735. // Match an 'or' with a specific operand 'Other' in either commuted variant.
  3736. SDValue X, Y;
  3737. auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
  3738. if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
  3739. return false;
  3740. if (Or.getOperand(0) == Other) {
  3741. X = Or.getOperand(0);
  3742. Y = Or.getOperand(1);
  3743. return true;
  3744. }
  3745. if (Or.getOperand(1) == Other) {
  3746. X = Or.getOperand(1);
  3747. Y = Or.getOperand(0);
  3748. return true;
  3749. }
  3750. return false;
  3751. };
  3752. EVT OpVT = N0.getValueType();
  3753. EVT ShAmtVT = N0.getOperand(2).getValueType();
  3754. SDValue F0 = N0.getOperand(0);
  3755. SDValue F1 = N0.getOperand(1);
  3756. if (matchOr(F0, F1)) {
  3757. // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
  3758. SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT);
  3759. SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT, Y, NewShAmt);
  3760. SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
  3761. return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
  3762. }
  3763. if (matchOr(F1, F0)) {
  3764. // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
  3765. SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT);
  3766. SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, Y, NewShAmt);
  3767. SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
  3768. return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
  3769. }
  3770. return SDValue();
  3771. }
  3772. /// Try to simplify a setcc built with the specified operands and cc. If it is
  3773. /// unable to simplify it, return a null SDValue.
  3774. SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
  3775. ISD::CondCode Cond, bool foldBooleans,
  3776. DAGCombinerInfo &DCI,
  3777. const SDLoc &dl) const {
  3778. SelectionDAG &DAG = DCI.DAG;
  3779. const DataLayout &Layout = DAG.getDataLayout();
  3780. EVT OpVT = N0.getValueType();
  3781. AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
  3782. // Constant fold or commute setcc.
  3783. if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
  3784. return Fold;
  3785. bool N0ConstOrSplat =
  3786. isConstOrConstSplat(N0, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
  3787. bool N1ConstOrSplat =
  3788. isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
  3789. // Ensure that the constant occurs on the RHS and fold constant comparisons.
  3790. // TODO: Handle non-splat vector constants. All undef causes trouble.
  3791. // FIXME: We can't yet fold constant scalable vector splats, so avoid an
  3792. // infinite loop here when we encounter one.
  3793. ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond);
  3794. if (N0ConstOrSplat && (!OpVT.isScalableVector() || !N1ConstOrSplat) &&
  3795. (DCI.isBeforeLegalizeOps() ||
  3796. isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
  3797. return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
  3798. // If we have a subtract with the same 2 non-constant operands as this setcc
  3799. // -- but in reverse order -- then try to commute the operands of this setcc
  3800. // to match. A matching pair of setcc (cmp) and sub may be combined into 1
  3801. // instruction on some targets.
  3802. if (!N0ConstOrSplat && !N1ConstOrSplat &&
  3803. (DCI.isBeforeLegalizeOps() ||
  3804. isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
  3805. DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
  3806. !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
  3807. return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
  3808. if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
  3809. return V;
  3810. if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
  3811. return V;
  3812. if (auto *N1C = isConstOrConstSplat(N1)) {
  3813. const APInt &C1 = N1C->getAPIntValue();
  3814. // Optimize some CTPOP cases.
  3815. if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
  3816. return V;
  3817. // For equality to 0 of a no-wrap multiply, decompose and test each op:
  3818. // X * Y == 0 --> (X == 0) || (Y == 0)
  3819. // X * Y != 0 --> (X != 0) && (Y != 0)
  3820. // TODO: This bails out if minsize is set, but if the target doesn't have a
  3821. // single instruction multiply for this type, it would likely be
  3822. // smaller to decompose.
  3823. if (C1.isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
  3824. N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
  3825. (N0->getFlags().hasNoUnsignedWrap() ||
  3826. N0->getFlags().hasNoSignedWrap()) &&
  3827. !Attr.hasFnAttr(Attribute::MinSize)) {
  3828. SDValue IsXZero = DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
  3829. SDValue IsYZero = DAG.getSetCC(dl, VT, N0.getOperand(1), N1, Cond);
  3830. unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
  3831. return DAG.getNode(LogicOp, dl, VT, IsXZero, IsYZero);
  3832. }
  3833. // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
  3834. // equality comparison, then we're just comparing whether X itself is
  3835. // zero.
  3836. if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
  3837. N0.getOperand(0).getOpcode() == ISD::CTLZ &&
  3838. isPowerOf2_32(N0.getScalarValueSizeInBits())) {
  3839. if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
  3840. if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
  3841. ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
  3842. if ((C1 == 0) == (Cond == ISD::SETEQ)) {
  3843. // (srl (ctlz x), 5) == 0 -> X != 0
  3844. // (srl (ctlz x), 5) != 1 -> X != 0
  3845. Cond = ISD::SETNE;
  3846. } else {
  3847. // (srl (ctlz x), 5) != 0 -> X == 0
  3848. // (srl (ctlz x), 5) == 1 -> X == 0
  3849. Cond = ISD::SETEQ;
  3850. }
  3851. SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
  3852. return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
  3853. Cond);
  3854. }
  3855. }
  3856. }
  3857. }
  3858. // FIXME: Support vectors.
  3859. if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
  3860. const APInt &C1 = N1C->getAPIntValue();
  3861. // (zext x) == C --> x == (trunc C)
  3862. // (sext x) == C --> x == (trunc C)
  3863. if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
  3864. DCI.isBeforeLegalize() && N0->hasOneUse()) {
  3865. unsigned MinBits = N0.getValueSizeInBits();
  3866. SDValue PreExt;
  3867. bool Signed = false;
  3868. if (N0->getOpcode() == ISD::ZERO_EXTEND) {
  3869. // ZExt
  3870. MinBits = N0->getOperand(0).getValueSizeInBits();
  3871. PreExt = N0->getOperand(0);
  3872. } else if (N0->getOpcode() == ISD::AND) {
  3873. // DAGCombine turns costly ZExts into ANDs
  3874. if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
  3875. if ((C->getAPIntValue()+1).isPowerOf2()) {
  3876. MinBits = C->getAPIntValue().countTrailingOnes();
  3877. PreExt = N0->getOperand(0);
  3878. }
  3879. } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
  3880. // SExt
  3881. MinBits = N0->getOperand(0).getValueSizeInBits();
  3882. PreExt = N0->getOperand(0);
  3883. Signed = true;
  3884. } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
  3885. // ZEXTLOAD / SEXTLOAD
  3886. if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
  3887. MinBits = LN0->getMemoryVT().getSizeInBits();
  3888. PreExt = N0;
  3889. } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
  3890. Signed = true;
  3891. MinBits = LN0->getMemoryVT().getSizeInBits();
  3892. PreExt = N0;
  3893. }
  3894. }
  3895. // Figure out how many bits we need to preserve this constant.
  3896. unsigned ReqdBits = Signed ? C1.getMinSignedBits() : C1.getActiveBits();
  3897. // Make sure we're not losing bits from the constant.
  3898. if (MinBits > 0 &&
  3899. MinBits < C1.getBitWidth() &&
  3900. MinBits >= ReqdBits) {
  3901. EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
  3902. if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
  3903. // Will get folded away.
  3904. SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
  3905. if (MinBits == 1 && C1 == 1)
  3906. // Invert the condition.
  3907. return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
  3908. Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
  3909. SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
  3910. return DAG.getSetCC(dl, VT, Trunc, C, Cond);
  3911. }
  3912. // If truncating the setcc operands is not desirable, we can still
  3913. // simplify the expression in some cases:
  3914. // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
  3915. // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
  3916. // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
  3917. // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
  3918. // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
  3919. // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
  3920. SDValue TopSetCC = N0->getOperand(0);
  3921. unsigned N0Opc = N0->getOpcode();
  3922. bool SExt = (N0Opc == ISD::SIGN_EXTEND);
  3923. if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
  3924. TopSetCC.getOpcode() == ISD::SETCC &&
  3925. (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
  3926. (isConstFalseVal(N1) ||
  3927. isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
  3928. bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
  3929. (!N1C->isZero() && Cond == ISD::SETNE);
  3930. if (!Inverse)
  3931. return TopSetCC;
  3932. ISD::CondCode InvCond = ISD::getSetCCInverse(
  3933. cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
  3934. TopSetCC.getOperand(0).getValueType());
  3935. return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
  3936. TopSetCC.getOperand(1),
  3937. InvCond);
  3938. }
  3939. }
  3940. }
  3941. // If the LHS is '(and load, const)', the RHS is 0, the test is for
  3942. // equality or unsigned, and all 1 bits of the const are in the same
  3943. // partial word, see if we can shorten the load.
  3944. if (DCI.isBeforeLegalize() &&
  3945. !ISD::isSignedIntSetCC(Cond) &&
  3946. N0.getOpcode() == ISD::AND && C1 == 0 &&
  3947. N0.getNode()->hasOneUse() &&
  3948. isa<LoadSDNode>(N0.getOperand(0)) &&
  3949. N0.getOperand(0).getNode()->hasOneUse() &&
  3950. isa<ConstantSDNode>(N0.getOperand(1))) {
  3951. LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
  3952. APInt bestMask;
  3953. unsigned bestWidth = 0, bestOffset = 0;
  3954. if (Lod->isSimple() && Lod->isUnindexed()) {
  3955. unsigned origWidth = N0.getValueSizeInBits();
  3956. unsigned maskWidth = origWidth;
  3957. // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
  3958. // 8 bits, but have to be careful...
  3959. if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
  3960. origWidth = Lod->getMemoryVT().getSizeInBits();
  3961. const APInt &Mask = N0.getConstantOperandAPInt(1);
  3962. for (unsigned width = origWidth / 2; width>=8; width /= 2) {
  3963. APInt newMask = APInt::getLowBitsSet(maskWidth, width);
  3964. for (unsigned offset=0; offset<origWidth/width; offset++) {
  3965. if (Mask.isSubsetOf(newMask)) {
  3966. if (Layout.isLittleEndian())
  3967. bestOffset = (uint64_t)offset * (width/8);
  3968. else
  3969. bestOffset = (origWidth/width - offset - 1) * (width/8);
  3970. bestMask = Mask.lshr(offset * (width/8) * 8);
  3971. bestWidth = width;
  3972. break;
  3973. }
  3974. newMask <<= width;
  3975. }
  3976. }
  3977. }
  3978. if (bestWidth) {
  3979. EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
  3980. if (newVT.isRound() &&
  3981. shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT)) {
  3982. SDValue Ptr = Lod->getBasePtr();
  3983. if (bestOffset != 0)
  3984. Ptr =
  3985. DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(bestOffset), dl);
  3986. SDValue NewLoad =
  3987. DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
  3988. Lod->getPointerInfo().getWithOffset(bestOffset),
  3989. Lod->getOriginalAlign());
  3990. return DAG.getSetCC(dl, VT,
  3991. DAG.getNode(ISD::AND, dl, newVT, NewLoad,
  3992. DAG.getConstant(bestMask.trunc(bestWidth),
  3993. dl, newVT)),
  3994. DAG.getConstant(0LL, dl, newVT), Cond);
  3995. }
  3996. }
  3997. }
  3998. // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
  3999. if (N0.getOpcode() == ISD::ZERO_EXTEND) {
  4000. unsigned InSize = N0.getOperand(0).getValueSizeInBits();
  4001. // If the comparison constant has bits in the upper part, the
  4002. // zero-extended value could never match.
  4003. if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),
  4004. C1.getBitWidth() - InSize))) {
  4005. switch (Cond) {
  4006. case ISD::SETUGT:
  4007. case ISD::SETUGE:
  4008. case ISD::SETEQ:
  4009. return DAG.getConstant(0, dl, VT);
  4010. case ISD::SETULT:
  4011. case ISD::SETULE:
  4012. case ISD::SETNE:
  4013. return DAG.getConstant(1, dl, VT);
  4014. case ISD::SETGT:
  4015. case ISD::SETGE:
  4016. // True if the sign bit of C1 is set.
  4017. return DAG.getConstant(C1.isNegative(), dl, VT);
  4018. case ISD::SETLT:
  4019. case ISD::SETLE:
  4020. // True if the sign bit of C1 isn't set.
  4021. return DAG.getConstant(C1.isNonNegative(), dl, VT);
  4022. default:
  4023. break;
  4024. }
  4025. }
  4026. // Otherwise, we can perform the comparison with the low bits.
  4027. switch (Cond) {
  4028. case ISD::SETEQ:
  4029. case ISD::SETNE:
  4030. case ISD::SETUGT:
  4031. case ISD::SETUGE:
  4032. case ISD::SETULT:
  4033. case ISD::SETULE: {
  4034. EVT newVT = N0.getOperand(0).getValueType();
  4035. if (DCI.isBeforeLegalizeOps() ||
  4036. (isOperationLegal(ISD::SETCC, newVT) &&
  4037. isCondCodeLegal(Cond, newVT.getSimpleVT()))) {
  4038. EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
  4039. SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
  4040. SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
  4041. NewConst, Cond);
  4042. return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
  4043. }
  4044. break;
  4045. }
  4046. default:
  4047. break; // todo, be more careful with signed comparisons
  4048. }
  4049. } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
  4050. (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
  4051. !isSExtCheaperThanZExt(cast<VTSDNode>(N0.getOperand(1))->getVT(),
  4052. OpVT)) {
  4053. EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
  4054. unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
  4055. EVT ExtDstTy = N0.getValueType();
  4056. unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
  4057. // If the constant doesn't fit into the number of bits for the source of
  4058. // the sign extension, it is impossible for both sides to be equal.
  4059. if (C1.getMinSignedBits() > ExtSrcTyBits)
  4060. return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
  4061. assert(ExtDstTy == N0.getOperand(0).getValueType() &&
  4062. ExtDstTy != ExtSrcTy && "Unexpected types!");
  4063. APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
  4064. SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0),
  4065. DAG.getConstant(Imm, dl, ExtDstTy));
  4066. if (!DCI.isCalledByLegalizer())
  4067. DCI.AddToWorklist(ZextOp.getNode());
  4068. // Otherwise, make this a use of a zext.
  4069. return DAG.getSetCC(dl, VT, ZextOp,
  4070. DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
  4071. } else if ((N1C->isZero() || N1C->isOne()) &&
  4072. (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
  4073. // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC
  4074. if (N0.getOpcode() == ISD::SETCC &&
  4075. isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
  4076. (N0.getValueType() == MVT::i1 ||
  4077. getBooleanContents(N0.getOperand(0).getValueType()) ==
  4078. ZeroOrOneBooleanContent)) {
  4079. bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
  4080. if (TrueWhenTrue)
  4081. return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
  4082. // Invert the condition.
  4083. ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
  4084. CC = ISD::getSetCCInverse(CC, N0.getOperand(0).getValueType());
  4085. if (DCI.isBeforeLegalizeOps() ||
  4086. isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType()))
  4087. return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
  4088. }
  4089. if ((N0.getOpcode() == ISD::XOR ||
  4090. (N0.getOpcode() == ISD::AND &&
  4091. N0.getOperand(0).getOpcode() == ISD::XOR &&
  4092. N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
  4093. isOneConstant(N0.getOperand(1))) {
  4094. // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
  4095. // can only do this if the top bits are known zero.
  4096. unsigned BitWidth = N0.getValueSizeInBits();
  4097. if (DAG.MaskedValueIsZero(N0,
  4098. APInt::getHighBitsSet(BitWidth,
  4099. BitWidth-1))) {
  4100. // Okay, get the un-inverted input value.
  4101. SDValue Val;
  4102. if (N0.getOpcode() == ISD::XOR) {
  4103. Val = N0.getOperand(0);
  4104. } else {
  4105. assert(N0.getOpcode() == ISD::AND &&
  4106. N0.getOperand(0).getOpcode() == ISD::XOR);
  4107. // ((X^1)&1)^1 -> X & 1
  4108. Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
  4109. N0.getOperand(0).getOperand(0),
  4110. N0.getOperand(1));
  4111. }
  4112. return DAG.getSetCC(dl, VT, Val, N1,
  4113. Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
  4114. }
  4115. } else if (N1C->isOne()) {
  4116. SDValue Op0 = N0;
  4117. if (Op0.getOpcode() == ISD::TRUNCATE)
  4118. Op0 = Op0.getOperand(0);
  4119. if ((Op0.getOpcode() == ISD::XOR) &&
  4120. Op0.getOperand(0).getOpcode() == ISD::SETCC &&
  4121. Op0.getOperand(1).getOpcode() == ISD::SETCC) {
  4122. SDValue XorLHS = Op0.getOperand(0);
  4123. SDValue XorRHS = Op0.getOperand(1);
  4124. // Ensure that the input setccs return an i1 type or 0/1 value.
  4125. if (Op0.getValueType() == MVT::i1 ||
  4126. (getBooleanContents(XorLHS.getOperand(0).getValueType()) ==
  4127. ZeroOrOneBooleanContent &&
  4128. getBooleanContents(XorRHS.getOperand(0).getValueType()) ==
  4129. ZeroOrOneBooleanContent)) {
  4130. // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
  4131. Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
  4132. return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
  4133. }
  4134. }
  4135. if (Op0.getOpcode() == ISD::AND && isOneConstant(Op0.getOperand(1))) {
  4136. // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
  4137. if (Op0.getValueType().bitsGT(VT))
  4138. Op0 = DAG.getNode(ISD::AND, dl, VT,
  4139. DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
  4140. DAG.getConstant(1, dl, VT));
  4141. else if (Op0.getValueType().bitsLT(VT))
  4142. Op0 = DAG.getNode(ISD::AND, dl, VT,
  4143. DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
  4144. DAG.getConstant(1, dl, VT));
  4145. return DAG.getSetCC(dl, VT, Op0,
  4146. DAG.getConstant(0, dl, Op0.getValueType()),
  4147. Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
  4148. }
  4149. if (Op0.getOpcode() == ISD::AssertZext &&
  4150. cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
  4151. return DAG.getSetCC(dl, VT, Op0,
  4152. DAG.getConstant(0, dl, Op0.getValueType()),
  4153. Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
  4154. }
  4155. }
  4156. // Given:
  4157. // icmp eq/ne (urem %x, %y), 0
  4158. // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
  4159. // icmp eq/ne %x, 0
  4160. if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
  4161. (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
  4162. KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
  4163. KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
  4164. if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
  4165. return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
  4166. }
  4167. // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
  4168. // and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
  4169. if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
  4170. N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
  4171. N0.getConstantOperandAPInt(1) == OpVT.getScalarSizeInBits() - 1 &&
  4172. N1C && N1C->isAllOnes()) {
  4173. return DAG.getSetCC(dl, VT, N0.getOperand(0),
  4174. DAG.getConstant(0, dl, OpVT),
  4175. Cond == ISD::SETEQ ? ISD::SETLT : ISD::SETGE);
  4176. }
  4177. if (SDValue V =
  4178. optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
  4179. return V;
  4180. }
  4181. // These simplifications apply to splat vectors as well.
  4182. // TODO: Handle more splat vector cases.
  4183. if (auto *N1C = isConstOrConstSplat(N1)) {
  4184. const APInt &C1 = N1C->getAPIntValue();
  4185. APInt MinVal, MaxVal;
  4186. unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
  4187. if (ISD::isSignedIntSetCC(Cond)) {
  4188. MinVal = APInt::getSignedMinValue(OperandBitSize);
  4189. MaxVal = APInt::getSignedMaxValue(OperandBitSize);
  4190. } else {
  4191. MinVal = APInt::getMinValue(OperandBitSize);
  4192. MaxVal = APInt::getMaxValue(OperandBitSize);
  4193. }
  4194. // Canonicalize GE/LE comparisons to use GT/LT comparisons.
  4195. if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
  4196. // X >= MIN --> true
  4197. if (C1 == MinVal)
  4198. return DAG.getBoolConstant(true, dl, VT, OpVT);
  4199. if (!VT.isVector()) { // TODO: Support this for vectors.
  4200. // X >= C0 --> X > (C0 - 1)
  4201. APInt C = C1 - 1;
  4202. ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
  4203. if ((DCI.isBeforeLegalizeOps() ||
  4204. isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
  4205. (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
  4206. isLegalICmpImmediate(C.getSExtValue())))) {
  4207. return DAG.getSetCC(dl, VT, N0,
  4208. DAG.getConstant(C, dl, N1.getValueType()),
  4209. NewCC);
  4210. }
  4211. }
  4212. }
  4213. if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
  4214. // X <= MAX --> true
  4215. if (C1 == MaxVal)
  4216. return DAG.getBoolConstant(true, dl, VT, OpVT);
  4217. // X <= C0 --> X < (C0 + 1)
  4218. if (!VT.isVector()) { // TODO: Support this for vectors.
  4219. APInt C = C1 + 1;
  4220. ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
  4221. if ((DCI.isBeforeLegalizeOps() ||
  4222. isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
  4223. (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
  4224. isLegalICmpImmediate(C.getSExtValue())))) {
  4225. return DAG.getSetCC(dl, VT, N0,
  4226. DAG.getConstant(C, dl, N1.getValueType()),
  4227. NewCC);
  4228. }
  4229. }
  4230. }
  4231. if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
  4232. if (C1 == MinVal)
  4233. return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
  4234. // TODO: Support this for vectors after legalize ops.
  4235. if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
  4236. // Canonicalize setlt X, Max --> setne X, Max
  4237. if (C1 == MaxVal)
  4238. return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
  4239. // If we have setult X, 1, turn it into seteq X, 0
  4240. if (C1 == MinVal+1)
  4241. return DAG.getSetCC(dl, VT, N0,
  4242. DAG.getConstant(MinVal, dl, N0.getValueType()),
  4243. ISD::SETEQ);
  4244. }
  4245. }
  4246. if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
  4247. if (C1 == MaxVal)
  4248. return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
  4249. // TODO: Support this for vectors after legalize ops.
  4250. if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
  4251. // Canonicalize setgt X, Min --> setne X, Min
  4252. if (C1 == MinVal)
  4253. return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
  4254. // If we have setugt X, Max-1, turn it into seteq X, Max
  4255. if (C1 == MaxVal-1)
  4256. return DAG.getSetCC(dl, VT, N0,
  4257. DAG.getConstant(MaxVal, dl, N0.getValueType()),
  4258. ISD::SETEQ);
  4259. }
  4260. }
  4261. if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
  4262. // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
  4263. if (C1.isZero())
  4264. if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
  4265. VT, N0, N1, Cond, DCI, dl))
  4266. return CC;
  4267. // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
  4268. // For example, when high 32-bits of i64 X are known clear:
  4269. // all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0
  4270. // all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1
  4271. bool CmpZero = N1C->getAPIntValue().isZero();
  4272. bool CmpNegOne = N1C->getAPIntValue().isAllOnes();
  4273. if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
  4274. // Match or(lo,shl(hi,bw/2)) pattern.
  4275. auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
  4276. unsigned EltBits = V.getScalarValueSizeInBits();
  4277. if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
  4278. return false;
  4279. SDValue LHS = V.getOperand(0);
  4280. SDValue RHS = V.getOperand(1);
  4281. APInt HiBits = APInt::getHighBitsSet(EltBits, EltBits / 2);
  4282. // Unshifted element must have zero upperbits.
  4283. if (RHS.getOpcode() == ISD::SHL &&
  4284. isa<ConstantSDNode>(RHS.getOperand(1)) &&
  4285. RHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
  4286. DAG.MaskedValueIsZero(LHS, HiBits)) {
  4287. Lo = LHS;
  4288. Hi = RHS.getOperand(0);
  4289. return true;
  4290. }
  4291. if (LHS.getOpcode() == ISD::SHL &&
  4292. isa<ConstantSDNode>(LHS.getOperand(1)) &&
  4293. LHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
  4294. DAG.MaskedValueIsZero(RHS, HiBits)) {
  4295. Lo = RHS;
  4296. Hi = LHS.getOperand(0);
  4297. return true;
  4298. }
  4299. return false;
  4300. };
  4301. auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
  4302. unsigned EltBits = N0.getScalarValueSizeInBits();
  4303. unsigned HalfBits = EltBits / 2;
  4304. APInt HiBits = APInt::getHighBitsSet(EltBits, HalfBits);
  4305. SDValue LoBits = DAG.getConstant(~HiBits, dl, OpVT);
  4306. SDValue HiMask = DAG.getNode(ISD::AND, dl, OpVT, Hi, LoBits);
  4307. SDValue NewN0 =
  4308. DAG.getNode(CmpZero ? ISD::OR : ISD::AND, dl, OpVT, Lo, HiMask);
  4309. SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LoBits;
  4310. return DAG.getSetCC(dl, VT, NewN0, NewN1, Cond);
  4311. };
  4312. SDValue Lo, Hi;
  4313. if (IsConcat(N0, Lo, Hi))
  4314. return MergeConcat(Lo, Hi);
  4315. if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
  4316. SDValue Lo0, Lo1, Hi0, Hi1;
  4317. if (IsConcat(N0.getOperand(0), Lo0, Hi0) &&
  4318. IsConcat(N0.getOperand(1), Lo1, Hi1)) {
  4319. return MergeConcat(DAG.getNode(N0.getOpcode(), dl, OpVT, Lo0, Lo1),
  4320. DAG.getNode(N0.getOpcode(), dl, OpVT, Hi0, Hi1));
  4321. }
  4322. }
  4323. }
  4324. }
  4325. // If we have "setcc X, C0", check to see if we can shrink the immediate
  4326. // by changing cc.
  4327. // TODO: Support this for vectors after legalize ops.
  4328. if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
  4329. // SETUGT X, SINTMAX -> SETLT X, 0
  4330. // SETUGE X, SINTMIN -> SETLT X, 0
  4331. if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) ||
  4332. (Cond == ISD::SETUGE && C1.isMinSignedValue()))
  4333. return DAG.getSetCC(dl, VT, N0,
  4334. DAG.getConstant(0, dl, N1.getValueType()),
  4335. ISD::SETLT);
  4336. // SETULT X, SINTMIN -> SETGT X, -1
  4337. // SETULE X, SINTMAX -> SETGT X, -1
  4338. if ((Cond == ISD::SETULT && C1.isMinSignedValue()) ||
  4339. (Cond == ISD::SETULE && C1.isMaxSignedValue()))
  4340. return DAG.getSetCC(dl, VT, N0,
  4341. DAG.getAllOnesConstant(dl, N1.getValueType()),
  4342. ISD::SETGT);
  4343. }
  4344. }
  4345. // Back to non-vector simplifications.
  4346. // TODO: Can we do these for vector splats?
  4347. if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
  4348. const TargetLowering &TLI = DAG.getTargetLoweringInfo();
  4349. const APInt &C1 = N1C->getAPIntValue();
  4350. EVT ShValTy = N0.getValueType();
  4351. // Fold bit comparisons when we can. This will result in an
  4352. // incorrect value when boolean false is negative one, unless
  4353. // the bitsize is 1 in which case the false value is the same
  4354. // in practice regardless of the representation.
  4355. if ((VT.getSizeInBits() == 1 ||
  4356. getBooleanContents(N0.getValueType()) == ZeroOrOneBooleanContent) &&
  4357. (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
  4358. (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
  4359. N0.getOpcode() == ISD::AND) {
  4360. if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
  4361. EVT ShiftTy =
  4362. getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize());
  4363. if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
  4364. // Perform the xform if the AND RHS is a single bit.
  4365. unsigned ShCt = AndRHS->getAPIntValue().logBase2();
  4366. if (AndRHS->getAPIntValue().isPowerOf2() &&
  4367. !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
  4368. return DAG.getNode(ISD::TRUNCATE, dl, VT,
  4369. DAG.getNode(ISD::SRL, dl, ShValTy, N0,
  4370. DAG.getConstant(ShCt, dl, ShiftTy)));
  4371. }
  4372. } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
  4373. // (X & 8) == 8 --> (X & 8) >> 3
  4374. // Perform the xform if C1 is a single bit.
  4375. unsigned ShCt = C1.logBase2();
  4376. if (C1.isPowerOf2() &&
  4377. !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
  4378. return DAG.getNode(ISD::TRUNCATE, dl, VT,
  4379. DAG.getNode(ISD::SRL, dl, ShValTy, N0,
  4380. DAG.getConstant(ShCt, dl, ShiftTy)));
  4381. }
  4382. }
  4383. }
  4384. }
  4385. if (C1.getMinSignedBits() <= 64 &&
  4386. !isLegalICmpImmediate(C1.getSExtValue())) {
  4387. EVT ShiftTy = getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize());
  4388. // (X & -256) == 256 -> (X >> 8) == 1
  4389. if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
  4390. N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
  4391. if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
  4392. const APInt &AndRHSC = AndRHS->getAPIntValue();
  4393. if (AndRHSC.isNegatedPowerOf2() && (AndRHSC & C1) == C1) {
  4394. unsigned ShiftBits = AndRHSC.countTrailingZeros();
  4395. if (!TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
  4396. SDValue Shift =
  4397. DAG.getNode(ISD::SRL, dl, ShValTy, N0.getOperand(0),
  4398. DAG.getConstant(ShiftBits, dl, ShiftTy));
  4399. SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, ShValTy);
  4400. return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
  4401. }
  4402. }
  4403. }
  4404. } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
  4405. Cond == ISD::SETULE || Cond == ISD::SETUGT) {
  4406. bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
  4407. // X < 0x100000000 -> (X >> 32) < 1
  4408. // X >= 0x100000000 -> (X >> 32) >= 1
  4409. // X <= 0x0ffffffff -> (X >> 32) < 1
  4410. // X > 0x0ffffffff -> (X >> 32) >= 1
  4411. unsigned ShiftBits;
  4412. APInt NewC = C1;
  4413. ISD::CondCode NewCond = Cond;
  4414. if (AdjOne) {
  4415. ShiftBits = C1.countTrailingOnes();
  4416. NewC = NewC + 1;
  4417. NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
  4418. } else {
  4419. ShiftBits = C1.countTrailingZeros();
  4420. }
  4421. NewC.lshrInPlace(ShiftBits);
  4422. if (ShiftBits && NewC.getMinSignedBits() <= 64 &&
  4423. isLegalICmpImmediate(NewC.getSExtValue()) &&
  4424. !TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
  4425. SDValue Shift = DAG.getNode(ISD::SRL, dl, ShValTy, N0,
  4426. DAG.getConstant(ShiftBits, dl, ShiftTy));
  4427. SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
  4428. return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
  4429. }
  4430. }
  4431. }
  4432. }
  4433. if (!isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1)) {
  4434. auto *CFP = cast<ConstantFPSDNode>(N1);
  4435. assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
  4436. // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
  4437. // constant if knowing that the operand is non-nan is enough. We prefer to
  4438. // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
  4439. // materialize 0.0.
  4440. if (Cond == ISD::SETO || Cond == ISD::SETUO)
  4441. return DAG.getSetCC(dl, VT, N0, N0, Cond);
  4442. // setcc (fneg x), C -> setcc swap(pred) x, -C
  4443. if (N0.getOpcode() == ISD::FNEG) {
  4444. ISD::CondCode SwapCond = ISD::getSetCCSwappedOperands(Cond);
  4445. if (DCI.isBeforeLegalizeOps() ||
  4446. isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
  4447. SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
  4448. return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
  4449. }
  4450. }
  4451. // If the condition is not legal, see if we can find an equivalent one
  4452. // which is legal.
  4453. if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) {
  4454. // If the comparison was an awkward floating-point == or != and one of
  4455. // the comparison operands is infinity or negative infinity, convert the
  4456. // condition to a less-awkward <= or >=.
  4457. if (CFP->getValueAPF().isInfinity()) {
  4458. bool IsNegInf = CFP->getValueAPF().isNegative();
  4459. ISD::CondCode NewCond = ISD::SETCC_INVALID;
  4460. switch (Cond) {
  4461. case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
  4462. case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
  4463. case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
  4464. case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
  4465. default: break;
  4466. }
  4467. if (NewCond != ISD::SETCC_INVALID &&
  4468. isCondCodeLegal(NewCond, N0.getSimpleValueType()))
  4469. return DAG.getSetCC(dl, VT, N0, N1, NewCond);
  4470. }
  4471. }
  4472. }
  4473. if (N0 == N1) {
  4474. // The sext(setcc()) => setcc() optimization relies on the appropriate
  4475. // constant being emitted.
  4476. assert(!N0.getValueType().isInteger() &&
  4477. "Integer types should be handled by FoldSetCC");
  4478. bool EqTrue = ISD::isTrueWhenEqual(Cond);
  4479. unsigned UOF = ISD::getUnorderedFlavor(Cond);
  4480. if (UOF == 2) // FP operators that are undefined on NaNs.
  4481. return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
  4482. if (UOF == unsigned(EqTrue))
  4483. return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
  4484. // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
  4485. // if it is not already.
  4486. ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
  4487. if (NewCond != Cond &&
  4488. (DCI.isBeforeLegalizeOps() ||
  4489. isCondCodeLegal(NewCond, N0.getSimpleValueType())))
  4490. return DAG.getSetCC(dl, VT, N0, N1, NewCond);
  4491. }
  4492. if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
  4493. N0.getValueType().isInteger()) {
  4494. if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
  4495. N0.getOpcode() == ISD::XOR) {
  4496. // Simplify (X+Y) == (X+Z) --> Y == Z
  4497. if (N0.getOpcode() == N1.getOpcode()) {
  4498. if (N0.getOperand(0) == N1.getOperand(0))
  4499. return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
  4500. if (N0.getOperand(1) == N1.getOperand(1))
  4501. return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
  4502. if (isCommutativeBinOp(N0.getOpcode())) {
  4503. // If X op Y == Y op X, try other combinations.
  4504. if (N0.getOperand(0) == N1.getOperand(1))
  4505. return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
  4506. Cond);
  4507. if (N0.getOperand(1) == N1.getOperand(0))
  4508. return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
  4509. Cond);
  4510. }
  4511. }
  4512. // If RHS is a legal immediate value for a compare instruction, we need
  4513. // to be careful about increasing register pressure needlessly.
  4514. bool LegalRHSImm = false;
  4515. if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
  4516. if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
  4517. // Turn (X+C1) == C2 --> X == C2-C1
  4518. if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse())
  4519. return DAG.getSetCC(
  4520. dl, VT, N0.getOperand(0),
  4521. DAG.getConstant(RHSC->getAPIntValue() - LHSR->getAPIntValue(),
  4522. dl, N0.getValueType()),
  4523. Cond);
  4524. // Turn (X^C1) == C2 --> X == C1^C2
  4525. if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse())
  4526. return DAG.getSetCC(
  4527. dl, VT, N0.getOperand(0),
  4528. DAG.getConstant(LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
  4529. dl, N0.getValueType()),
  4530. Cond);
  4531. }
  4532. // Turn (C1-X) == C2 --> X == C1-C2
  4533. if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
  4534. if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse())
  4535. return DAG.getSetCC(
  4536. dl, VT, N0.getOperand(1),
  4537. DAG.getConstant(SUBC->getAPIntValue() - RHSC->getAPIntValue(),
  4538. dl, N0.getValueType()),
  4539. Cond);
  4540. // Could RHSC fold directly into a compare?
  4541. if (RHSC->getValueType(0).getSizeInBits() <= 64)
  4542. LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
  4543. }
  4544. // (X+Y) == X --> Y == 0 and similar folds.
  4545. // Don't do this if X is an immediate that can fold into a cmp
  4546. // instruction and X+Y has other uses. It could be an induction variable
  4547. // chain, and the transform would increase register pressure.
  4548. if (!LegalRHSImm || N0.hasOneUse())
  4549. if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
  4550. return V;
  4551. }
  4552. if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
  4553. N1.getOpcode() == ISD::XOR)
  4554. if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
  4555. return V;
  4556. if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
  4557. return V;
  4558. }
  4559. // Fold remainder of division by a constant.
  4560. if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
  4561. N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
  4562. // When division is cheap or optimizing for minimum size,
  4563. // fall through to DIVREM creation by skipping this fold.
  4564. if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
  4565. if (N0.getOpcode() == ISD::UREM) {
  4566. if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
  4567. return Folded;
  4568. } else if (N0.getOpcode() == ISD::SREM) {
  4569. if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
  4570. return Folded;
  4571. }
  4572. }
  4573. }
  4574. // Fold away ALL boolean setcc's.
  4575. if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
  4576. SDValue Temp;
  4577. switch (Cond) {
  4578. default: llvm_unreachable("Unknown integer setcc!");
  4579. case ISD::SETEQ: // X == Y -> ~(X^Y)
  4580. Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
  4581. N0 = DAG.getNOT(dl, Temp, OpVT);
  4582. if (!DCI.isCalledByLegalizer())
  4583. DCI.AddToWorklist(Temp.getNode());
  4584. break;
  4585. case ISD::SETNE: // X != Y --> (X^Y)
  4586. N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
  4587. break;
  4588. case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
  4589. case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
  4590. Temp = DAG.getNOT(dl, N0, OpVT);
  4591. N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
  4592. if (!DCI.isCalledByLegalizer())
  4593. DCI.AddToWorklist(Temp.getNode());
  4594. break;
  4595. case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
  4596. case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
  4597. Temp = DAG.getNOT(dl, N1, OpVT);
  4598. N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
  4599. if (!DCI.isCalledByLegalizer())
  4600. DCI.AddToWorklist(Temp.getNode());
  4601. break;
  4602. case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
  4603. case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
  4604. Temp = DAG.getNOT(dl, N0, OpVT);
  4605. N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
  4606. if (!DCI.isCalledByLegalizer())
  4607. DCI.AddToWorklist(Temp.getNode());
  4608. break;
  4609. case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
  4610. case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
  4611. Temp = DAG.getNOT(dl, N1, OpVT);
  4612. N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
  4613. break;
  4614. }
  4615. if (VT.getScalarType() != MVT::i1) {
  4616. if (!DCI.isCalledByLegalizer())
  4617. DCI.AddToWorklist(N0.getNode());
  4618. // FIXME: If running after legalize, we probably can't do this.
  4619. ISD::NodeType ExtendCode = getExtendForContent(getBooleanContents(OpVT));
  4620. N0 = DAG.getNode(ExtendCode, dl, VT, N0);
  4621. }
  4622. return N0;
  4623. }
  4624. // Could not fold it.
  4625. return SDValue();
  4626. }
  4627. /// Returns true (and the GlobalValue and the offset) if the node is a
  4628. /// GlobalAddress + offset.
  4629. bool TargetLowering::isGAPlusOffset(SDNode *WN, const GlobalValue *&GA,
  4630. int64_t &Offset) const {
  4631. SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
  4632. if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
  4633. GA = GASD->getGlobal();
  4634. Offset += GASD->getOffset();
  4635. return true;
  4636. }
  4637. if (N->getOpcode() == ISD::ADD) {
  4638. SDValue N1 = N->getOperand(0);
  4639. SDValue N2 = N->getOperand(1);
  4640. if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
  4641. if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
  4642. Offset += V->getSExtValue();
  4643. return true;
  4644. }
  4645. } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
  4646. if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
  4647. Offset += V->getSExtValue();
  4648. return true;
  4649. }
  4650. }
  4651. }
  4652. return false;
  4653. }
  4654. SDValue TargetLowering::PerformDAGCombine(SDNode *N,
  4655. DAGCombinerInfo &DCI) const {
  4656. // Default implementation: no optimization.
  4657. return SDValue();
  4658. }
  4659. //===----------------------------------------------------------------------===//
  4660. // Inline Assembler Implementation Methods
  4661. //===----------------------------------------------------------------------===//
  4662. TargetLowering::ConstraintType
  4663. TargetLowering::getConstraintType(StringRef Constraint) const {
  4664. unsigned S = Constraint.size();
  4665. if (S == 1) {
  4666. switch (Constraint[0]) {
  4667. default: break;
  4668. case 'r':
  4669. return C_RegisterClass;
  4670. case 'm': // memory
  4671. case 'o': // offsetable
  4672. case 'V': // not offsetable
  4673. return C_Memory;
  4674. case 'p': // Address.
  4675. return C_Address;
  4676. case 'n': // Simple Integer
  4677. case 'E': // Floating Point Constant
  4678. case 'F': // Floating Point Constant
  4679. return C_Immediate;
  4680. case 'i': // Simple Integer or Relocatable Constant
  4681. case 's': // Relocatable Constant
  4682. case 'X': // Allow ANY value.
  4683. case 'I': // Target registers.
  4684. case 'J':
  4685. case 'K':
  4686. case 'L':
  4687. case 'M':
  4688. case 'N':
  4689. case 'O':
  4690. case 'P':
  4691. case '<':
  4692. case '>':
  4693. return C_Other;
  4694. }
  4695. }
  4696. if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
  4697. if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
  4698. return C_Memory;
  4699. return C_Register;
  4700. }
  4701. return C_Unknown;
  4702. }
  4703. /// Try to replace an X constraint, which matches anything, with another that
  4704. /// has more specific requirements based on the type of the corresponding
  4705. /// operand.
  4706. const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
  4707. if (ConstraintVT.isInteger())
  4708. return "r";
  4709. if (ConstraintVT.isFloatingPoint())
  4710. return "f"; // works for many targets
  4711. return nullptr;
  4712. }
  4713. SDValue TargetLowering::LowerAsmOutputForConstraint(
  4714. SDValue &Chain, SDValue &Flag, const SDLoc &DL,
  4715. const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
  4716. return SDValue();
  4717. }
  4718. /// Lower the specified operand into the Ops vector.
  4719. /// If it is invalid, don't add anything to Ops.
  4720. void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
  4721. std::string &Constraint,
  4722. std::vector<SDValue> &Ops,
  4723. SelectionDAG &DAG) const {
  4724. if (Constraint.length() > 1) return;
  4725. char ConstraintLetter = Constraint[0];
  4726. switch (ConstraintLetter) {
  4727. default: break;
  4728. case 'X': // Allows any operand
  4729. case 'i': // Simple Integer or Relocatable Constant
  4730. case 'n': // Simple Integer
  4731. case 's': { // Relocatable Constant
  4732. ConstantSDNode *C;
  4733. uint64_t Offset = 0;
  4734. // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
  4735. // etc., since getelementpointer is variadic. We can't use
  4736. // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
  4737. // while in this case the GA may be furthest from the root node which is
  4738. // likely an ISD::ADD.
  4739. while (true) {
  4740. if ((C = dyn_cast<ConstantSDNode>(Op)) && ConstraintLetter != 's') {
  4741. // gcc prints these as sign extended. Sign extend value to 64 bits
  4742. // now; without this it would get ZExt'd later in
  4743. // ScheduleDAGSDNodes::EmitNode, which is very generic.
  4744. bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
  4745. BooleanContent BCont = getBooleanContents(MVT::i64);
  4746. ISD::NodeType ExtOpc =
  4747. IsBool ? getExtendForContent(BCont) : ISD::SIGN_EXTEND;
  4748. int64_t ExtVal =
  4749. ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
  4750. Ops.push_back(
  4751. DAG.getTargetConstant(Offset + ExtVal, SDLoc(C), MVT::i64));
  4752. return;
  4753. }
  4754. if (ConstraintLetter != 'n') {
  4755. if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
  4756. Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
  4757. GA->getValueType(0),
  4758. Offset + GA->getOffset()));
  4759. return;
  4760. }
  4761. if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
  4762. Ops.push_back(DAG.getTargetBlockAddress(
  4763. BA->getBlockAddress(), BA->getValueType(0),
  4764. Offset + BA->getOffset(), BA->getTargetFlags()));
  4765. return;
  4766. }
  4767. if (isa<BasicBlockSDNode>(Op)) {
  4768. Ops.push_back(Op);
  4769. return;
  4770. }
  4771. }
  4772. const unsigned OpCode = Op.getOpcode();
  4773. if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
  4774. if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
  4775. Op = Op.getOperand(1);
  4776. // Subtraction is not commutative.
  4777. else if (OpCode == ISD::ADD &&
  4778. (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
  4779. Op = Op.getOperand(0);
  4780. else
  4781. return;
  4782. Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
  4783. continue;
  4784. }
  4785. return;
  4786. }
  4787. break;
  4788. }
  4789. }
  4790. }
  4791. void TargetLowering::CollectTargetIntrinsicOperands(const CallInst &I,
  4792. SmallVectorImpl<SDValue> &Ops,
  4793. SelectionDAG &DAG) const {
  4794. return;
  4795. }
  4796. std::pair<unsigned, const TargetRegisterClass *>
  4797. TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
  4798. StringRef Constraint,
  4799. MVT VT) const {
  4800. if (Constraint.empty() || Constraint[0] != '{')
  4801. return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
  4802. assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
  4803. // Remove the braces from around the name.
  4804. StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
  4805. std::pair<unsigned, const TargetRegisterClass *> R =
  4806. std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
  4807. // Figure out which register class contains this reg.
  4808. for (const TargetRegisterClass *RC : RI->regclasses()) {
  4809. // If none of the value types for this register class are valid, we
  4810. // can't use it. For example, 64-bit reg classes on 32-bit targets.
  4811. if (!isLegalRC(*RI, *RC))
  4812. continue;
  4813. for (const MCPhysReg &PR : *RC) {
  4814. if (RegName.equals_insensitive(RI->getRegAsmName(PR))) {
  4815. std::pair<unsigned, const TargetRegisterClass *> S =
  4816. std::make_pair(PR, RC);
  4817. // If this register class has the requested value type, return it,
  4818. // otherwise keep searching and return the first class found
  4819. // if no other is found which explicitly has the requested type.
  4820. if (RI->isTypeLegalForClass(*RC, VT))
  4821. return S;
  4822. if (!R.second)
  4823. R = S;
  4824. }
  4825. }
  4826. }
  4827. return R;
  4828. }
  4829. //===----------------------------------------------------------------------===//
  4830. // Constraint Selection.
  4831. /// Return true of this is an input operand that is a matching constraint like
  4832. /// "4".
  4833. bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
  4834. assert(!ConstraintCode.empty() && "No known constraint!");
  4835. return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
  4836. }
  4837. /// If this is an input matching constraint, this method returns the output
  4838. /// operand it matches.
  4839. unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
  4840. assert(!ConstraintCode.empty() && "No known constraint!");
  4841. return atoi(ConstraintCode.c_str());
  4842. }
  4843. /// Split up the constraint string from the inline assembly value into the
  4844. /// specific constraints and their prefixes, and also tie in the associated
  4845. /// operand values.
  4846. /// If this returns an empty vector, and if the constraint string itself
  4847. /// isn't empty, there was an error parsing.
  4848. TargetLowering::AsmOperandInfoVector
  4849. TargetLowering::ParseConstraints(const DataLayout &DL,
  4850. const TargetRegisterInfo *TRI,
  4851. const CallBase &Call) const {
  4852. /// Information about all of the constraints.
  4853. AsmOperandInfoVector ConstraintOperands;
  4854. const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
  4855. unsigned maCount = 0; // Largest number of multiple alternative constraints.
  4856. // Do a prepass over the constraints, canonicalizing them, and building up the
  4857. // ConstraintOperands list.
  4858. unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
  4859. unsigned ResNo = 0; // ResNo - The result number of the next output.
  4860. unsigned LabelNo = 0; // LabelNo - CallBr indirect dest number.
  4861. for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
  4862. ConstraintOperands.emplace_back(std::move(CI));
  4863. AsmOperandInfo &OpInfo = ConstraintOperands.back();
  4864. // Update multiple alternative constraint count.
  4865. if (OpInfo.multipleAlternatives.size() > maCount)
  4866. maCount = OpInfo.multipleAlternatives.size();
  4867. OpInfo.ConstraintVT = MVT::Other;
  4868. // Compute the value type for each operand.
  4869. switch (OpInfo.Type) {
  4870. case InlineAsm::isOutput:
  4871. // Indirect outputs just consume an argument.
  4872. if (OpInfo.isIndirect) {
  4873. OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
  4874. break;
  4875. }
  4876. // The return value of the call is this value. As such, there is no
  4877. // corresponding argument.
  4878. assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
  4879. if (StructType *STy = dyn_cast<StructType>(Call.getType())) {
  4880. OpInfo.ConstraintVT =
  4881. getSimpleValueType(DL, STy->getElementType(ResNo));
  4882. } else {
  4883. assert(ResNo == 0 && "Asm only has one result!");
  4884. OpInfo.ConstraintVT =
  4885. getAsmOperandValueType(DL, Call.getType()).getSimpleVT();
  4886. }
  4887. ++ResNo;
  4888. break;
  4889. case InlineAsm::isInput:
  4890. OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
  4891. break;
  4892. case InlineAsm::isLabel:
  4893. OpInfo.CallOperandVal = cast<CallBrInst>(&Call)->getIndirectDest(LabelNo);
  4894. ++LabelNo;
  4895. continue;
  4896. case InlineAsm::isClobber:
  4897. // Nothing to do.
  4898. break;
  4899. }
  4900. if (OpInfo.CallOperandVal) {
  4901. llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
  4902. if (OpInfo.isIndirect) {
  4903. OpTy = Call.getParamElementType(ArgNo);
  4904. assert(OpTy && "Indirect operand must have elementtype attribute");
  4905. }
  4906. // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
  4907. if (StructType *STy = dyn_cast<StructType>(OpTy))
  4908. if (STy->getNumElements() == 1)
  4909. OpTy = STy->getElementType(0);
  4910. // If OpTy is not a single value, it may be a struct/union that we
  4911. // can tile with integers.
  4912. if (!OpTy->isSingleValueType() && OpTy->isSized()) {
  4913. unsigned BitSize = DL.getTypeSizeInBits(OpTy);
  4914. switch (BitSize) {
  4915. default: break;
  4916. case 1:
  4917. case 8:
  4918. case 16:
  4919. case 32:
  4920. case 64:
  4921. case 128:
  4922. OpTy = IntegerType::get(OpTy->getContext(), BitSize);
  4923. break;
  4924. }
  4925. }
  4926. EVT VT = getAsmOperandValueType(DL, OpTy, true);
  4927. OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
  4928. ArgNo++;
  4929. }
  4930. }
  4931. // If we have multiple alternative constraints, select the best alternative.
  4932. if (!ConstraintOperands.empty()) {
  4933. if (maCount) {
  4934. unsigned bestMAIndex = 0;
  4935. int bestWeight = -1;
  4936. // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
  4937. int weight = -1;
  4938. unsigned maIndex;
  4939. // Compute the sums of the weights for each alternative, keeping track
  4940. // of the best (highest weight) one so far.
  4941. for (maIndex = 0; maIndex < maCount; ++maIndex) {
  4942. int weightSum = 0;
  4943. for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
  4944. cIndex != eIndex; ++cIndex) {
  4945. AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
  4946. if (OpInfo.Type == InlineAsm::isClobber)
  4947. continue;
  4948. // If this is an output operand with a matching input operand,
  4949. // look up the matching input. If their types mismatch, e.g. one
  4950. // is an integer, the other is floating point, or their sizes are
  4951. // different, flag it as an maCantMatch.
  4952. if (OpInfo.hasMatchingInput()) {
  4953. AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
  4954. if (OpInfo.ConstraintVT != Input.ConstraintVT) {
  4955. if ((OpInfo.ConstraintVT.isInteger() !=
  4956. Input.ConstraintVT.isInteger()) ||
  4957. (OpInfo.ConstraintVT.getSizeInBits() !=
  4958. Input.ConstraintVT.getSizeInBits())) {
  4959. weightSum = -1; // Can't match.
  4960. break;
  4961. }
  4962. }
  4963. }
  4964. weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
  4965. if (weight == -1) {
  4966. weightSum = -1;
  4967. break;
  4968. }
  4969. weightSum += weight;
  4970. }
  4971. // Update best.
  4972. if (weightSum > bestWeight) {
  4973. bestWeight = weightSum;
  4974. bestMAIndex = maIndex;
  4975. }
  4976. }
  4977. // Now select chosen alternative in each constraint.
  4978. for (AsmOperandInfo &cInfo : ConstraintOperands)
  4979. if (cInfo.Type != InlineAsm::isClobber)
  4980. cInfo.selectAlternative(bestMAIndex);
  4981. }
  4982. }
  4983. // Check and hook up tied operands, choose constraint code to use.
  4984. for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
  4985. cIndex != eIndex; ++cIndex) {
  4986. AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
  4987. // If this is an output operand with a matching input operand, look up the
  4988. // matching input. If their types mismatch, e.g. one is an integer, the
  4989. // other is floating point, or their sizes are different, flag it as an
  4990. // error.
  4991. if (OpInfo.hasMatchingInput()) {
  4992. AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
  4993. if (OpInfo.ConstraintVT != Input.ConstraintVT) {
  4994. std::pair<unsigned, const TargetRegisterClass *> MatchRC =
  4995. getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
  4996. OpInfo.ConstraintVT);
  4997. std::pair<unsigned, const TargetRegisterClass *> InputRC =
  4998. getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
  4999. Input.ConstraintVT);
  5000. if ((OpInfo.ConstraintVT.isInteger() !=
  5001. Input.ConstraintVT.isInteger()) ||
  5002. (MatchRC.second != InputRC.second)) {
  5003. report_fatal_error("Unsupported asm: input constraint"
  5004. " with a matching output constraint of"
  5005. " incompatible type!");
  5006. }
  5007. }
  5008. }
  5009. }
  5010. return ConstraintOperands;
  5011. }
  5012. /// Return an integer indicating how general CT is.
  5013. static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
  5014. switch (CT) {
  5015. case TargetLowering::C_Immediate:
  5016. case TargetLowering::C_Other:
  5017. case TargetLowering::C_Unknown:
  5018. return 0;
  5019. case TargetLowering::C_Register:
  5020. return 1;
  5021. case TargetLowering::C_RegisterClass:
  5022. return 2;
  5023. case TargetLowering::C_Memory:
  5024. case TargetLowering::C_Address:
  5025. return 3;
  5026. }
  5027. llvm_unreachable("Invalid constraint type");
  5028. }
  5029. /// Examine constraint type and operand type and determine a weight value.
  5030. /// This object must already have been set up with the operand type
  5031. /// and the current alternative constraint selected.
  5032. TargetLowering::ConstraintWeight
  5033. TargetLowering::getMultipleConstraintMatchWeight(
  5034. AsmOperandInfo &info, int maIndex) const {
  5035. InlineAsm::ConstraintCodeVector *rCodes;
  5036. if (maIndex >= (int)info.multipleAlternatives.size())
  5037. rCodes = &info.Codes;
  5038. else
  5039. rCodes = &info.multipleAlternatives[maIndex].Codes;
  5040. ConstraintWeight BestWeight = CW_Invalid;
  5041. // Loop over the options, keeping track of the most general one.
  5042. for (const std::string &rCode : *rCodes) {
  5043. ConstraintWeight weight =
  5044. getSingleConstraintMatchWeight(info, rCode.c_str());
  5045. if (weight > BestWeight)
  5046. BestWeight = weight;
  5047. }
  5048. return BestWeight;
  5049. }
  5050. /// Examine constraint type and operand type and determine a weight value.
  5051. /// This object must already have been set up with the operand type
  5052. /// and the current alternative constraint selected.
  5053. TargetLowering::ConstraintWeight
  5054. TargetLowering::getSingleConstraintMatchWeight(
  5055. AsmOperandInfo &info, const char *constraint) const {
  5056. ConstraintWeight weight = CW_Invalid;
  5057. Value *CallOperandVal = info.CallOperandVal;
  5058. // If we don't have a value, we can't do a match,
  5059. // but allow it at the lowest weight.
  5060. if (!CallOperandVal)
  5061. return CW_Default;
  5062. // Look at the constraint type.
  5063. switch (*constraint) {
  5064. case 'i': // immediate integer.
  5065. case 'n': // immediate integer with a known value.
  5066. if (isa<ConstantInt>(CallOperandVal))
  5067. weight = CW_Constant;
  5068. break;
  5069. case 's': // non-explicit intregal immediate.
  5070. if (isa<GlobalValue>(CallOperandVal))
  5071. weight = CW_Constant;
  5072. break;
  5073. case 'E': // immediate float if host format.
  5074. case 'F': // immediate float.
  5075. if (isa<ConstantFP>(CallOperandVal))
  5076. weight = CW_Constant;
  5077. break;
  5078. case '<': // memory operand with autodecrement.
  5079. case '>': // memory operand with autoincrement.
  5080. case 'm': // memory operand.
  5081. case 'o': // offsettable memory operand
  5082. case 'V': // non-offsettable memory operand
  5083. weight = CW_Memory;
  5084. break;
  5085. case 'r': // general register.
  5086. case 'g': // general register, memory operand or immediate integer.
  5087. // note: Clang converts "g" to "imr".
  5088. if (CallOperandVal->getType()->isIntegerTy())
  5089. weight = CW_Register;
  5090. break;
  5091. case 'X': // any operand.
  5092. default:
  5093. weight = CW_Default;
  5094. break;
  5095. }
  5096. return weight;
  5097. }
  5098. /// If there are multiple different constraints that we could pick for this
  5099. /// operand (e.g. "imr") try to pick the 'best' one.
  5100. /// This is somewhat tricky: constraints fall into four classes:
  5101. /// Other -> immediates and magic values
  5102. /// Register -> one specific register
  5103. /// RegisterClass -> a group of regs
  5104. /// Memory -> memory
  5105. /// Ideally, we would pick the most specific constraint possible: if we have
  5106. /// something that fits into a register, we would pick it. The problem here
  5107. /// is that if we have something that could either be in a register or in
  5108. /// memory that use of the register could cause selection of *other*
  5109. /// operands to fail: they might only succeed if we pick memory. Because of
  5110. /// this the heuristic we use is:
  5111. ///
  5112. /// 1) If there is an 'other' constraint, and if the operand is valid for
  5113. /// that constraint, use it. This makes us take advantage of 'i'
  5114. /// constraints when available.
  5115. /// 2) Otherwise, pick the most general constraint present. This prefers
  5116. /// 'm' over 'r', for example.
  5117. ///
  5118. static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
  5119. const TargetLowering &TLI,
  5120. SDValue Op, SelectionDAG *DAG) {
  5121. assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options");
  5122. unsigned BestIdx = 0;
  5123. TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown;
  5124. int BestGenerality = -1;
  5125. // Loop over the options, keeping track of the most general one.
  5126. for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) {
  5127. TargetLowering::ConstraintType CType =
  5128. TLI.getConstraintType(OpInfo.Codes[i]);
  5129. // Indirect 'other' or 'immediate' constraints are not allowed.
  5130. if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
  5131. CType == TargetLowering::C_Register ||
  5132. CType == TargetLowering::C_RegisterClass))
  5133. continue;
  5134. // If this is an 'other' or 'immediate' constraint, see if the operand is
  5135. // valid for it. For example, on X86 we might have an 'rI' constraint. If
  5136. // the operand is an integer in the range [0..31] we want to use I (saving a
  5137. // load of a register), otherwise we must use 'r'.
  5138. if ((CType == TargetLowering::C_Other ||
  5139. CType == TargetLowering::C_Immediate) && Op.getNode()) {
  5140. assert(OpInfo.Codes[i].size() == 1 &&
  5141. "Unhandled multi-letter 'other' constraint");
  5142. std::vector<SDValue> ResultOps;
  5143. TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i],
  5144. ResultOps, *DAG);
  5145. if (!ResultOps.empty()) {
  5146. BestType = CType;
  5147. BestIdx = i;
  5148. break;
  5149. }
  5150. }
  5151. // Things with matching constraints can only be registers, per gcc
  5152. // documentation. This mainly affects "g" constraints.
  5153. if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
  5154. continue;
  5155. // This constraint letter is more general than the previous one, use it.
  5156. int Generality = getConstraintGenerality(CType);
  5157. if (Generality > BestGenerality) {
  5158. BestType = CType;
  5159. BestIdx = i;
  5160. BestGenerality = Generality;
  5161. }
  5162. }
  5163. OpInfo.ConstraintCode = OpInfo.Codes[BestIdx];
  5164. OpInfo.ConstraintType = BestType;
  5165. }
  5166. /// Determines the constraint code and constraint type to use for the specific
  5167. /// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
  5168. void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
  5169. SDValue Op,
  5170. SelectionDAG *DAG) const {
  5171. assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
  5172. // Single-letter constraints ('r') are very common.
  5173. if (OpInfo.Codes.size() == 1) {
  5174. OpInfo.ConstraintCode = OpInfo.Codes[0];
  5175. OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
  5176. } else {
  5177. ChooseConstraint(OpInfo, *this, Op, DAG);
  5178. }
  5179. // 'X' matches anything.
  5180. if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
  5181. // Constants are handled elsewhere. For Functions, the type here is the
  5182. // type of the result, which is not what we want to look at; leave them
  5183. // alone.
  5184. Value *v = OpInfo.CallOperandVal;
  5185. if (isa<ConstantInt>(v) || isa<Function>(v)) {
  5186. return;
  5187. }
  5188. if (isa<BasicBlock>(v) || isa<BlockAddress>(v)) {
  5189. OpInfo.ConstraintCode = "i";
  5190. return;
  5191. }
  5192. // Otherwise, try to resolve it to something we know about by looking at
  5193. // the actual operand type.
  5194. if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
  5195. OpInfo.ConstraintCode = Repl;
  5196. OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
  5197. }
  5198. }
  5199. }
  5200. /// Given an exact SDIV by a constant, create a multiplication
  5201. /// with the multiplicative inverse of the constant.
  5202. static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
  5203. const SDLoc &dl, SelectionDAG &DAG,
  5204. SmallVectorImpl<SDNode *> &Created) {
  5205. SDValue Op0 = N->getOperand(0);
  5206. SDValue Op1 = N->getOperand(1);
  5207. EVT VT = N->getValueType(0);
  5208. EVT SVT = VT.getScalarType();
  5209. EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
  5210. EVT ShSVT = ShVT.getScalarType();
  5211. bool UseSRA = false;
  5212. SmallVector<SDValue, 16> Shifts, Factors;
  5213. auto BuildSDIVPattern = [&](ConstantSDNode *C) {
  5214. if (C->isZero())
  5215. return false;
  5216. APInt Divisor = C->getAPIntValue();
  5217. unsigned Shift = Divisor.countTrailingZeros();
  5218. if (Shift) {
  5219. Divisor.ashrInPlace(Shift);
  5220. UseSRA = true;
  5221. }
  5222. // Calculate the multiplicative inverse, using Newton's method.
  5223. APInt t;
  5224. APInt Factor = Divisor;
  5225. while ((t = Divisor * Factor) != 1)
  5226. Factor *= APInt(Divisor.getBitWidth(), 2) - t;
  5227. Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
  5228. Factors.push_back(DAG.getConstant(Factor, dl, SVT));
  5229. return true;
  5230. };
  5231. // Collect all magic values from the build vector.
  5232. if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
  5233. return SDValue();
  5234. SDValue Shift, Factor;
  5235. if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
  5236. Shift = DAG.getBuildVector(ShVT, dl, Shifts);
  5237. Factor = DAG.getBuildVector(VT, dl, Factors);
  5238. } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
  5239. assert(Shifts.size() == 1 && Factors.size() == 1 &&
  5240. "Expected matchUnaryPredicate to return one element for scalable "
  5241. "vectors");
  5242. Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
  5243. Factor = DAG.getSplatVector(VT, dl, Factors[0]);
  5244. } else {
  5245. assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
  5246. Shift = Shifts[0];
  5247. Factor = Factors[0];
  5248. }
  5249. SDValue Res = Op0;
  5250. // Shift the value upfront if it is even, so the LSB is one.
  5251. if (UseSRA) {
  5252. // TODO: For UDIV use SRL instead of SRA.
  5253. SDNodeFlags Flags;
  5254. Flags.setExact(true);
  5255. Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, Flags);
  5256. Created.push_back(Res.getNode());
  5257. }
  5258. return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
  5259. }
  5260. SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
  5261. SelectionDAG &DAG,
  5262. SmallVectorImpl<SDNode *> &Created) const {
  5263. AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
  5264. const TargetLowering &TLI = DAG.getTargetLoweringInfo();
  5265. if (TLI.isIntDivCheap(N->getValueType(0), Attr))
  5266. return SDValue(N, 0); // Lower SDIV as SDIV
  5267. return SDValue();
  5268. }
  5269. SDValue
  5270. TargetLowering::BuildSREMPow2(SDNode *N, const APInt &Divisor,
  5271. SelectionDAG &DAG,
  5272. SmallVectorImpl<SDNode *> &Created) const {
  5273. AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
  5274. const TargetLowering &TLI = DAG.getTargetLoweringInfo();
  5275. if (TLI.isIntDivCheap(N->getValueType(0), Attr))
  5276. return SDValue(N, 0); // Lower SREM as SREM
  5277. return SDValue();
  5278. }
  5279. /// Given an ISD::SDIV node expressing a divide by constant,
  5280. /// return a DAG expression to select that will generate the same value by
  5281. /// multiplying by a magic number.
  5282. /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
  5283. SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
  5284. bool IsAfterLegalization,
  5285. SmallVectorImpl<SDNode *> &Created) const {
  5286. SDLoc dl(N);
  5287. EVT VT = N->getValueType(0);
  5288. EVT SVT = VT.getScalarType();
  5289. EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
  5290. EVT ShSVT = ShVT.getScalarType();
  5291. unsigned EltBits = VT.getScalarSizeInBits();
  5292. EVT MulVT;
  5293. // Check to see if we can do this.
  5294. // FIXME: We should be more aggressive here.
  5295. if (!isTypeLegal(VT)) {
  5296. // Limit this to simple scalars for now.
  5297. if (VT.isVector() || !VT.isSimple())
  5298. return SDValue();
  5299. // If this type will be promoted to a large enough type with a legal
  5300. // multiply operation, we can go ahead and do this transform.
  5301. if (getTypeAction(VT.getSimpleVT()) != TypePromoteInteger)
  5302. return SDValue();
  5303. MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
  5304. if (MulVT.getSizeInBits() < (2 * EltBits) ||
  5305. !isOperationLegal(ISD::MUL, MulVT))
  5306. return SDValue();
  5307. }
  5308. // If the sdiv has an 'exact' bit we can use a simpler lowering.
  5309. if (N->getFlags().hasExact())
  5310. return BuildExactSDIV(*this, N, dl, DAG, Created);
  5311. SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
  5312. auto BuildSDIVPattern = [&](ConstantSDNode *C) {
  5313. if (C->isZero())
  5314. return false;
  5315. const APInt &Divisor = C->getAPIntValue();
  5316. SignedDivisionByConstantInfo magics = SignedDivisionByConstantInfo::get(Divisor);
  5317. int NumeratorFactor = 0;
  5318. int ShiftMask = -1;
  5319. if (Divisor.isOne() || Divisor.isAllOnes()) {
  5320. // If d is +1/-1, we just multiply the numerator by +1/-1.
  5321. NumeratorFactor = Divisor.getSExtValue();
  5322. magics.Magic = 0;
  5323. magics.ShiftAmount = 0;
  5324. ShiftMask = 0;
  5325. } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
  5326. // If d > 0 and m < 0, add the numerator.
  5327. NumeratorFactor = 1;
  5328. } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
  5329. // If d < 0 and m > 0, subtract the numerator.
  5330. NumeratorFactor = -1;
  5331. }
  5332. MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT));
  5333. Factors.push_back(DAG.getConstant(NumeratorFactor, dl, SVT));
  5334. Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT));
  5335. ShiftMasks.push_back(DAG.getConstant(ShiftMask, dl, SVT));
  5336. return true;
  5337. };
  5338. SDValue N0 = N->getOperand(0);
  5339. SDValue N1 = N->getOperand(1);
  5340. // Collect the shifts / magic values from each element.
  5341. if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
  5342. return SDValue();
  5343. SDValue MagicFactor, Factor, Shift, ShiftMask;
  5344. if (N1.getOpcode() == ISD::BUILD_VECTOR) {
  5345. MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
  5346. Factor = DAG.getBuildVector(VT, dl, Factors);
  5347. Shift = DAG.getBuildVector(ShVT, dl, Shifts);
  5348. ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
  5349. } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
  5350. assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
  5351. Shifts.size() == 1 && ShiftMasks.size() == 1 &&
  5352. "Expected matchUnaryPredicate to return one element for scalable "
  5353. "vectors");
  5354. MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
  5355. Factor = DAG.getSplatVector(VT, dl, Factors[0]);
  5356. Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
  5357. ShiftMask = DAG.getSplatVector(VT, dl, ShiftMasks[0]);
  5358. } else {
  5359. assert(isa<ConstantSDNode>(N1) && "Expected a constant");
  5360. MagicFactor = MagicFactors[0];
  5361. Factor = Factors[0];
  5362. Shift = Shifts[0];
  5363. ShiftMask = ShiftMasks[0];
  5364. }
  5365. // Multiply the numerator (operand 0) by the magic value.
  5366. // FIXME: We should support doing a MUL in a wider type.
  5367. auto GetMULHS = [&](SDValue X, SDValue Y) {
  5368. // If the type isn't legal, use a wider mul of the the type calculated
  5369. // earlier.
  5370. if (!isTypeLegal(VT)) {
  5371. X = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, X);
  5372. Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, Y);
  5373. Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
  5374. Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
  5375. DAG.getShiftAmountConstant(EltBits, MulVT, dl));
  5376. return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
  5377. }
  5378. if (isOperationLegalOrCustom(ISD::MULHS, VT, IsAfterLegalization))
  5379. return DAG.getNode(ISD::MULHS, dl, VT, X, Y);
  5380. if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT, IsAfterLegalization)) {
  5381. SDValue LoHi =
  5382. DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
  5383. return SDValue(LoHi.getNode(), 1);
  5384. }
  5385. return SDValue();
  5386. };
  5387. SDValue Q = GetMULHS(N0, MagicFactor);
  5388. if (!Q)
  5389. return SDValue();
  5390. Created.push_back(Q.getNode());
  5391. // (Optionally) Add/subtract the numerator using Factor.
  5392. Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
  5393. Created.push_back(Factor.getNode());
  5394. Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
  5395. Created.push_back(Q.getNode());
  5396. // Shift right algebraic by shift value.
  5397. Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
  5398. Created.push_back(Q.getNode());
  5399. // Extract the sign bit, mask it and add it to the quotient.
  5400. SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
  5401. SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
  5402. Created.push_back(T.getNode());
  5403. T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
  5404. Created.push_back(T.getNode());
  5405. return DAG.getNode(ISD::ADD, dl, VT, Q, T);
  5406. }
  5407. /// Given an ISD::UDIV node expressing a divide by constant,
  5408. /// return a DAG expression to select that will generate the same value by
  5409. /// multiplying by a magic number.
  5410. /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
  5411. SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
  5412. bool IsAfterLegalization,
  5413. SmallVectorImpl<SDNode *> &Created) const {
  5414. SDLoc dl(N);
  5415. EVT VT = N->getValueType(0);
  5416. EVT SVT = VT.getScalarType();
  5417. EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
  5418. EVT ShSVT = ShVT.getScalarType();
  5419. unsigned EltBits = VT.getScalarSizeInBits();
  5420. EVT MulVT;
  5421. // Check to see if we can do this.
  5422. // FIXME: We should be more aggressive here.
  5423. if (!isTypeLegal(VT)) {
  5424. // Limit this to simple scalars for now.
  5425. if (VT.isVector() || !VT.isSimple())
  5426. return SDValue();
  5427. // If this type will be promoted to a large enough type with a legal
  5428. // multiply operation, we can go ahead and do this transform.
  5429. if (getTypeAction(VT.getSimpleVT()) != TypePromoteInteger)
  5430. return SDValue();
  5431. MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
  5432. if (MulVT.getSizeInBits() < (2 * EltBits) ||
  5433. !isOperationLegal(ISD::MUL, MulVT))
  5434. return SDValue();
  5435. }
  5436. SDValue N0 = N->getOperand(0);
  5437. SDValue N1 = N->getOperand(1);
  5438. // Try to use leading zeros of the dividend to reduce the multiplier and
  5439. // avoid expensive fixups.
  5440. // TODO: Support vectors.
  5441. unsigned LeadingZeros = 0;
  5442. if (!VT.isVector() && isa<ConstantSDNode>(N1)) {
  5443. assert(!isOneConstant(N1) && "Unexpected divisor");
  5444. LeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
  5445. // UnsignedDivisionByConstantInfo doesn't work correctly if leading zeros in
  5446. // the dividend exceeds the leading zeros for the divisor.
  5447. LeadingZeros =
  5448. std::min(LeadingZeros,
  5449. cast<ConstantSDNode>(N1)->getAPIntValue().countLeadingZeros());
  5450. }
  5451. bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
  5452. SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
  5453. auto BuildUDIVPattern = [&](ConstantSDNode *C) {
  5454. if (C->isZero())
  5455. return false;
  5456. const APInt& Divisor = C->getAPIntValue();
  5457. SDValue PreShift, MagicFactor, NPQFactor, PostShift;
  5458. // Magic algorithm doesn't work for division by 1. We need to emit a select
  5459. // at the end.
  5460. if (Divisor.isOne()) {
  5461. PreShift = PostShift = DAG.getUNDEF(ShSVT);
  5462. MagicFactor = NPQFactor = DAG.getUNDEF(SVT);
  5463. } else {
  5464. UnsignedDivisionByConstantInfo magics =
  5465. UnsignedDivisionByConstantInfo::get(Divisor, LeadingZeros);
  5466. MagicFactor = DAG.getConstant(magics.Magic, dl, SVT);
  5467. assert(magics.PreShift < Divisor.getBitWidth() &&
  5468. "We shouldn't generate an undefined shift!");
  5469. assert(magics.PostShift < Divisor.getBitWidth() &&
  5470. "We shouldn't generate an undefined shift!");
  5471. assert((!magics.IsAdd || magics.PreShift == 0) &&
  5472. "Unexpected pre-shift");
  5473. PreShift = DAG.getConstant(magics.PreShift, dl, ShSVT);
  5474. PostShift = DAG.getConstant(magics.PostShift, dl, ShSVT);
  5475. NPQFactor = DAG.getConstant(
  5476. magics.IsAdd ? APInt::getOneBitSet(EltBits, EltBits - 1)
  5477. : APInt::getZero(EltBits),
  5478. dl, SVT);
  5479. UseNPQ |= magics.IsAdd;
  5480. UsePreShift |= magics.PreShift != 0;
  5481. UsePostShift |= magics.PostShift != 0;
  5482. }
  5483. PreShifts.push_back(PreShift);
  5484. MagicFactors.push_back(MagicFactor);
  5485. NPQFactors.push_back(NPQFactor);
  5486. PostShifts.push_back(PostShift);
  5487. return true;
  5488. };
  5489. // Collect the shifts/magic values from each element.
  5490. if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
  5491. return SDValue();
  5492. SDValue PreShift, PostShift, MagicFactor, NPQFactor;
  5493. if (N1.getOpcode() == ISD::BUILD_VECTOR) {
  5494. PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
  5495. MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
  5496. NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
  5497. PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
  5498. } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
  5499. assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
  5500. NPQFactors.size() == 1 && PostShifts.size() == 1 &&
  5501. "Expected matchUnaryPredicate to return one for scalable vectors");
  5502. PreShift = DAG.getSplatVector(ShVT, dl, PreShifts[0]);
  5503. MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
  5504. NPQFactor = DAG.getSplatVector(VT, dl, NPQFactors[0]);
  5505. PostShift = DAG.getSplatVector(ShVT, dl, PostShifts[0]);
  5506. } else {
  5507. assert(isa<ConstantSDNode>(N1) && "Expected a constant");
  5508. PreShift = PreShifts[0];
  5509. MagicFactor = MagicFactors[0];
  5510. PostShift = PostShifts[0];
  5511. }
  5512. SDValue Q = N0;
  5513. if (UsePreShift) {
  5514. Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
  5515. Created.push_back(Q.getNode());
  5516. }
  5517. // FIXME: We should support doing a MUL in a wider type.
  5518. auto GetMULHU = [&](SDValue X, SDValue Y) {
  5519. // If the type isn't legal, use a wider mul of the the type calculated
  5520. // earlier.
  5521. if (!isTypeLegal(VT)) {
  5522. X = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, X);
  5523. Y = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, Y);
  5524. Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
  5525. Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
  5526. DAG.getShiftAmountConstant(EltBits, MulVT, dl));
  5527. return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
  5528. }
  5529. if (isOperationLegalOrCustom(ISD::MULHU, VT, IsAfterLegalization))
  5530. return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
  5531. if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT, IsAfterLegalization)) {
  5532. SDValue LoHi =
  5533. DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
  5534. return SDValue(LoHi.getNode(), 1);
  5535. }
  5536. return SDValue(); // No mulhu or equivalent
  5537. };
  5538. // Multiply the numerator (operand 0) by the magic value.
  5539. Q = GetMULHU(Q, MagicFactor);
  5540. if (!Q)
  5541. return SDValue();
  5542. Created.push_back(Q.getNode());
  5543. if (UseNPQ) {
  5544. SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
  5545. Created.push_back(NPQ.getNode());
  5546. // For vectors we might have a mix of non-NPQ/NPQ paths, so use
  5547. // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
  5548. if (VT.isVector())
  5549. NPQ = GetMULHU(NPQ, NPQFactor);
  5550. else
  5551. NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
  5552. Created.push_back(NPQ.getNode());
  5553. Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
  5554. Created.push_back(Q.getNode());
  5555. }
  5556. if (UsePostShift) {
  5557. Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
  5558. Created.push_back(Q.getNode());
  5559. }
  5560. EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
  5561. SDValue One = DAG.getConstant(1, dl, VT);
  5562. SDValue IsOne = DAG.getSetCC(dl, SetCCVT, N1, One, ISD::SETEQ);
  5563. return DAG.getSelect(dl, VT, IsOne, N0, Q);
  5564. }
  5565. /// If all values in Values that *don't* match the predicate are same 'splat'
  5566. /// value, then replace all values with that splat value.
  5567. /// Else, if AlternativeReplacement was provided, then replace all values that
  5568. /// do match predicate with AlternativeReplacement value.
  5569. static void
  5570. turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values,
  5571. std::function<bool(SDValue)> Predicate,
  5572. SDValue AlternativeReplacement = SDValue()) {
  5573. SDValue Replacement;
  5574. // Is there a value for which the Predicate does *NOT* match? What is it?
  5575. auto SplatValue = llvm::find_if_not(Values, Predicate);
  5576. if (SplatValue != Values.end()) {
  5577. // Does Values consist only of SplatValue's and values matching Predicate?
  5578. if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
  5579. return Value == *SplatValue || Predicate(Value);
  5580. })) // Then we shall replace values matching predicate with SplatValue.
  5581. Replacement = *SplatValue;
  5582. }
  5583. if (!Replacement) {
  5584. // Oops, we did not find the "baseline" splat value.
  5585. if (!AlternativeReplacement)
  5586. return; // Nothing to do.
  5587. // Let's replace with provided value then.
  5588. Replacement = AlternativeReplacement;
  5589. }
  5590. std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
  5591. }
  5592. /// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
  5593. /// where the divisor is constant and the comparison target is zero,
  5594. /// return a DAG expression that will generate the same comparison result
  5595. /// using only multiplications, additions and shifts/rotations.
  5596. /// Ref: "Hacker's Delight" 10-17.
  5597. SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
  5598. SDValue CompTargetNode,
  5599. ISD::CondCode Cond,
  5600. DAGCombinerInfo &DCI,
  5601. const SDLoc &DL) const {
  5602. SmallVector<SDNode *, 5> Built;
  5603. if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
  5604. DCI, DL, Built)) {
  5605. for (SDNode *N : Built)
  5606. DCI.AddToWorklist(N);
  5607. return Folded;
  5608. }
  5609. return SDValue();
  5610. }
  5611. SDValue
  5612. TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
  5613. SDValue CompTargetNode, ISD::CondCode Cond,
  5614. DAGCombinerInfo &DCI, const SDLoc &DL,
  5615. SmallVectorImpl<SDNode *> &Created) const {
  5616. // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
  5617. // - D must be constant, with D = D0 * 2^K where D0 is odd
  5618. // - P is the multiplicative inverse of D0 modulo 2^W
  5619. // - Q = floor(((2^W) - 1) / D)
  5620. // where W is the width of the common type of N and D.
  5621. assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
  5622. "Only applicable for (in)equality comparisons.");
  5623. SelectionDAG &DAG = DCI.DAG;
  5624. EVT VT = REMNode.getValueType();
  5625. EVT SVT = VT.getScalarType();
  5626. EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout(), !DCI.isBeforeLegalize());
  5627. EVT ShSVT = ShVT.getScalarType();
  5628. // If MUL is unavailable, we cannot proceed in any case.
  5629. if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
  5630. return SDValue();
  5631. bool ComparingWithAllZeros = true;
  5632. bool AllComparisonsWithNonZerosAreTautological = true;
  5633. bool HadTautologicalLanes = false;
  5634. bool AllLanesAreTautological = true;
  5635. bool HadEvenDivisor = false;
  5636. bool AllDivisorsArePowerOfTwo = true;
  5637. bool HadTautologicalInvertedLanes = false;
  5638. SmallVector<SDValue, 16> PAmts, KAmts, QAmts, IAmts;
  5639. auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
  5640. // Division by 0 is UB. Leave it to be constant-folded elsewhere.
  5641. if (CDiv->isZero())
  5642. return false;
  5643. const APInt &D = CDiv->getAPIntValue();
  5644. const APInt &Cmp = CCmp->getAPIntValue();
  5645. ComparingWithAllZeros &= Cmp.isZero();
  5646. // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
  5647. // if C2 is not less than C1, the comparison is always false.
  5648. // But we will only be able to produce the comparison that will give the
  5649. // opposive tautological answer. So this lane would need to be fixed up.
  5650. bool TautologicalInvertedLane = D.ule(Cmp);
  5651. HadTautologicalInvertedLanes |= TautologicalInvertedLane;
  5652. // If all lanes are tautological (either all divisors are ones, or divisor
  5653. // is not greater than the constant we are comparing with),
  5654. // we will prefer to avoid the fold.
  5655. bool TautologicalLane = D.isOne() || TautologicalInvertedLane;
  5656. HadTautologicalLanes |= TautologicalLane;
  5657. AllLanesAreTautological &= TautologicalLane;
  5658. // If we are comparing with non-zero, we need'll need to subtract said
  5659. // comparison value from the LHS. But there is no point in doing that if
  5660. // every lane where we are comparing with non-zero is tautological..
  5661. if (!Cmp.isZero())
  5662. AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
  5663. // Decompose D into D0 * 2^K
  5664. unsigned K = D.countTrailingZeros();
  5665. assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
  5666. APInt D0 = D.lshr(K);
  5667. // D is even if it has trailing zeros.
  5668. HadEvenDivisor |= (K != 0);
  5669. // D is a power-of-two if D0 is one.
  5670. // If all divisors are power-of-two, we will prefer to avoid the fold.
  5671. AllDivisorsArePowerOfTwo &= D0.isOne();
  5672. // P = inv(D0, 2^W)
  5673. // 2^W requires W + 1 bits, so we have to extend and then truncate.
  5674. unsigned W = D.getBitWidth();
  5675. APInt P = D0.zext(W + 1)
  5676. .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
  5677. .trunc(W);
  5678. assert(!P.isZero() && "No multiplicative inverse!"); // unreachable
  5679. assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
  5680. // Q = floor((2^W - 1) u/ D)
  5681. // R = ((2^W - 1) u% D)
  5682. APInt Q, R;
  5683. APInt::udivrem(APInt::getAllOnes(W), D, Q, R);
  5684. // If we are comparing with zero, then that comparison constant is okay,
  5685. // else it may need to be one less than that.
  5686. if (Cmp.ugt(R))
  5687. Q -= 1;
  5688. assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
  5689. "We are expecting that K is always less than all-ones for ShSVT");
  5690. // If the lane is tautological the result can be constant-folded.
  5691. if (TautologicalLane) {
  5692. // Set P and K amount to a bogus values so we can try to splat them.
  5693. P = 0;
  5694. K = -1;
  5695. // And ensure that comparison constant is tautological,
  5696. // it will always compare true/false.
  5697. Q = -1;
  5698. }
  5699. PAmts.push_back(DAG.getConstant(P, DL, SVT));
  5700. KAmts.push_back(
  5701. DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
  5702. QAmts.push_back(DAG.getConstant(Q, DL, SVT));
  5703. return true;
  5704. };
  5705. SDValue N = REMNode.getOperand(0);
  5706. SDValue D = REMNode.getOperand(1);
  5707. // Collect the values from each element.
  5708. if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
  5709. return SDValue();
  5710. // If all lanes are tautological, the result can be constant-folded.
  5711. if (AllLanesAreTautological)
  5712. return SDValue();
  5713. // If this is a urem by a powers-of-two, avoid the fold since it can be
  5714. // best implemented as a bit test.
  5715. if (AllDivisorsArePowerOfTwo)
  5716. return SDValue();
  5717. SDValue PVal, KVal, QVal;
  5718. if (D.getOpcode() == ISD::BUILD_VECTOR) {
  5719. if (HadTautologicalLanes) {
  5720. // Try to turn PAmts into a splat, since we don't care about the values
  5721. // that are currently '0'. If we can't, just keep '0'`s.
  5722. turnVectorIntoSplatVector(PAmts, isNullConstant);
  5723. // Try to turn KAmts into a splat, since we don't care about the values
  5724. // that are currently '-1'. If we can't, change them to '0'`s.
  5725. turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
  5726. DAG.getConstant(0, DL, ShSVT));
  5727. }
  5728. PVal = DAG.getBuildVector(VT, DL, PAmts);
  5729. KVal = DAG.getBuildVector(ShVT, DL, KAmts);
  5730. QVal = DAG.getBuildVector(VT, DL, QAmts);
  5731. } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
  5732. assert(PAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 &&
  5733. "Expected matchBinaryPredicate to return one element for "
  5734. "SPLAT_VECTORs");
  5735. PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
  5736. KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
  5737. QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
  5738. } else {
  5739. PVal = PAmts[0];
  5740. KVal = KAmts[0];
  5741. QVal = QAmts[0];
  5742. }
  5743. if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
  5744. if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::SUB, VT))
  5745. return SDValue(); // FIXME: Could/should use `ISD::ADD`?
  5746. assert(CompTargetNode.getValueType() == N.getValueType() &&
  5747. "Expecting that the types on LHS and RHS of comparisons match.");
  5748. N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
  5749. }
  5750. // (mul N, P)
  5751. SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
  5752. Created.push_back(Op0.getNode());
  5753. // Rotate right only if any divisor was even. We avoid rotates for all-odd
  5754. // divisors as a performance improvement, since rotating by 0 is a no-op.
  5755. if (HadEvenDivisor) {
  5756. // We need ROTR to do this.
  5757. if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
  5758. return SDValue();
  5759. // UREM: (rotr (mul N, P), K)
  5760. Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
  5761. Created.push_back(Op0.getNode());
  5762. }
  5763. // UREM: (setule/setugt (rotr (mul N, P), K), Q)
  5764. SDValue NewCC =
  5765. DAG.getSetCC(DL, SETCCVT, Op0, QVal,
  5766. ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
  5767. if (!HadTautologicalInvertedLanes)
  5768. return NewCC;
  5769. // If any lanes previously compared always-false, the NewCC will give
  5770. // always-true result for them, so we need to fixup those lanes.
  5771. // Or the other way around for inequality predicate.
  5772. assert(VT.isVector() && "Can/should only get here for vectors.");
  5773. Created.push_back(NewCC.getNode());
  5774. // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
  5775. // if C2 is not less than C1, the comparison is always false.
  5776. // But we have produced the comparison that will give the
  5777. // opposive tautological answer. So these lanes would need to be fixed up.
  5778. SDValue TautologicalInvertedChannels =
  5779. DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
  5780. Created.push_back(TautologicalInvertedChannels.getNode());
  5781. // NOTE: we avoid letting illegal types through even if we're before legalize
  5782. // ops – legalization has a hard time producing good code for this.
  5783. if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
  5784. // If we have a vector select, let's replace the comparison results in the
  5785. // affected lanes with the correct tautological result.
  5786. SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
  5787. DL, SETCCVT, SETCCVT);
  5788. return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
  5789. Replacement, NewCC);
  5790. }
  5791. // Else, we can just invert the comparison result in the appropriate lanes.
  5792. //
  5793. // NOTE: see the note above VSELECT above.
  5794. if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
  5795. return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
  5796. TautologicalInvertedChannels);
  5797. return SDValue(); // Don't know how to lower.
  5798. }
  5799. /// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
  5800. /// where the divisor is constant and the comparison target is zero,
  5801. /// return a DAG expression that will generate the same comparison result
  5802. /// using only multiplications, additions and shifts/rotations.
  5803. /// Ref: "Hacker's Delight" 10-17.
  5804. SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
  5805. SDValue CompTargetNode,
  5806. ISD::CondCode Cond,
  5807. DAGCombinerInfo &DCI,
  5808. const SDLoc &DL) const {
  5809. SmallVector<SDNode *, 7> Built;
  5810. if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
  5811. DCI, DL, Built)) {
  5812. assert(Built.size() <= 7 && "Max size prediction failed.");
  5813. for (SDNode *N : Built)
  5814. DCI.AddToWorklist(N);
  5815. return Folded;
  5816. }
  5817. return SDValue();
  5818. }
  5819. SDValue
  5820. TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
  5821. SDValue CompTargetNode, ISD::CondCode Cond,
  5822. DAGCombinerInfo &DCI, const SDLoc &DL,
  5823. SmallVectorImpl<SDNode *> &Created) const {
  5824. // Fold:
  5825. // (seteq/ne (srem N, D), 0)
  5826. // To:
  5827. // (setule/ugt (rotr (add (mul N, P), A), K), Q)
  5828. //
  5829. // - D must be constant, with D = D0 * 2^K where D0 is odd
  5830. // - P is the multiplicative inverse of D0 modulo 2^W
  5831. // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
  5832. // - Q = floor((2 * A) / (2^K))
  5833. // where W is the width of the common type of N and D.
  5834. assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
  5835. "Only applicable for (in)equality comparisons.");
  5836. SelectionDAG &DAG = DCI.DAG;
  5837. EVT VT = REMNode.getValueType();
  5838. EVT SVT = VT.getScalarType();
  5839. EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout(), !DCI.isBeforeLegalize());
  5840. EVT ShSVT = ShVT.getScalarType();
  5841. // If we are after ops legalization, and MUL is unavailable, we can not
  5842. // proceed.
  5843. if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
  5844. return SDValue();
  5845. // TODO: Could support comparing with non-zero too.
  5846. ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
  5847. if (!CompTarget || !CompTarget->isZero())
  5848. return SDValue();
  5849. bool HadIntMinDivisor = false;
  5850. bool HadOneDivisor = false;
  5851. bool AllDivisorsAreOnes = true;
  5852. bool HadEvenDivisor = false;
  5853. bool NeedToApplyOffset = false;
  5854. bool AllDivisorsArePowerOfTwo = true;
  5855. SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
  5856. auto BuildSREMPattern = [&](ConstantSDNode *C) {
  5857. // Division by 0 is UB. Leave it to be constant-folded elsewhere.
  5858. if (C->isZero())
  5859. return false;
  5860. // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
  5861. // WARNING: this fold is only valid for positive divisors!
  5862. APInt D = C->getAPIntValue();
  5863. if (D.isNegative())
  5864. D.negate(); // `rem %X, -C` is equivalent to `rem %X, C`
  5865. HadIntMinDivisor |= D.isMinSignedValue();
  5866. // If all divisors are ones, we will prefer to avoid the fold.
  5867. HadOneDivisor |= D.isOne();
  5868. AllDivisorsAreOnes &= D.isOne();
  5869. // Decompose D into D0 * 2^K
  5870. unsigned K = D.countTrailingZeros();
  5871. assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
  5872. APInt D0 = D.lshr(K);
  5873. if (!D.isMinSignedValue()) {
  5874. // D is even if it has trailing zeros; unless it's INT_MIN, in which case
  5875. // we don't care about this lane in this fold, we'll special-handle it.
  5876. HadEvenDivisor |= (K != 0);
  5877. }
  5878. // D is a power-of-two if D0 is one. This includes INT_MIN.
  5879. // If all divisors are power-of-two, we will prefer to avoid the fold.
  5880. AllDivisorsArePowerOfTwo &= D0.isOne();
  5881. // P = inv(D0, 2^W)
  5882. // 2^W requires W + 1 bits, so we have to extend and then truncate.
  5883. unsigned W = D.getBitWidth();
  5884. APInt P = D0.zext(W + 1)
  5885. .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
  5886. .trunc(W);
  5887. assert(!P.isZero() && "No multiplicative inverse!"); // unreachable
  5888. assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
  5889. // A = floor((2^(W - 1) - 1) / D0) & -2^K
  5890. APInt A = APInt::getSignedMaxValue(W).udiv(D0);
  5891. A.clearLowBits(K);
  5892. if (!D.isMinSignedValue()) {
  5893. // If divisor INT_MIN, then we don't care about this lane in this fold,
  5894. // we'll special-handle it.
  5895. NeedToApplyOffset |= A != 0;
  5896. }
  5897. // Q = floor((2 * A) / (2^K))
  5898. APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
  5899. assert(APInt::getAllOnes(SVT.getSizeInBits()).ugt(A) &&
  5900. "We are expecting that A is always less than all-ones for SVT");
  5901. assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
  5902. "We are expecting that K is always less than all-ones for ShSVT");
  5903. // If the divisor is 1 the result can be constant-folded. Likewise, we
  5904. // don't care about INT_MIN lanes, those can be set to undef if appropriate.
  5905. if (D.isOne()) {
  5906. // Set P, A and K to a bogus values so we can try to splat them.
  5907. P = 0;
  5908. A = -1;
  5909. K = -1;
  5910. // x ?% 1 == 0 <--> true <--> x u<= -1
  5911. Q = -1;
  5912. }
  5913. PAmts.push_back(DAG.getConstant(P, DL, SVT));
  5914. AAmts.push_back(DAG.getConstant(A, DL, SVT));
  5915. KAmts.push_back(
  5916. DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
  5917. QAmts.push_back(DAG.getConstant(Q, DL, SVT));
  5918. return true;
  5919. };
  5920. SDValue N = REMNode.getOperand(0);
  5921. SDValue D = REMNode.getOperand(1);
  5922. // Collect the values from each element.
  5923. if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
  5924. return SDValue();
  5925. // If this is a srem by a one, avoid the fold since it can be constant-folded.
  5926. if (AllDivisorsAreOnes)
  5927. return SDValue();
  5928. // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
  5929. // since it can be best implemented as a bit test.
  5930. if (AllDivisorsArePowerOfTwo)
  5931. return SDValue();
  5932. SDValue PVal, AVal, KVal, QVal;
  5933. if (D.getOpcode() == ISD::BUILD_VECTOR) {
  5934. if (HadOneDivisor) {
  5935. // Try to turn PAmts into a splat, since we don't care about the values
  5936. // that are currently '0'. If we can't, just keep '0'`s.
  5937. turnVectorIntoSplatVector(PAmts, isNullConstant);
  5938. // Try to turn AAmts into a splat, since we don't care about the
  5939. // values that are currently '-1'. If we can't, change them to '0'`s.
  5940. turnVectorIntoSplatVector(AAmts, isAllOnesConstant,
  5941. DAG.getConstant(0, DL, SVT));
  5942. // Try to turn KAmts into a splat, since we don't care about the values
  5943. // that are currently '-1'. If we can't, change them to '0'`s.
  5944. turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
  5945. DAG.getConstant(0, DL, ShSVT));
  5946. }
  5947. PVal = DAG.getBuildVector(VT, DL, PAmts);
  5948. AVal = DAG.getBuildVector(VT, DL, AAmts);
  5949. KVal = DAG.getBuildVector(ShVT, DL, KAmts);
  5950. QVal = DAG.getBuildVector(VT, DL, QAmts);
  5951. } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
  5952. assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
  5953. QAmts.size() == 1 &&
  5954. "Expected matchUnaryPredicate to return one element for scalable "
  5955. "vectors");
  5956. PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
  5957. AVal = DAG.getSplatVector(VT, DL, AAmts[0]);
  5958. KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
  5959. QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
  5960. } else {
  5961. assert(isa<ConstantSDNode>(D) && "Expected a constant");
  5962. PVal = PAmts[0];
  5963. AVal = AAmts[0];
  5964. KVal = KAmts[0];
  5965. QVal = QAmts[0];
  5966. }
  5967. // (mul N, P)
  5968. SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
  5969. Created.push_back(Op0.getNode());
  5970. if (NeedToApplyOffset) {
  5971. // We need ADD to do this.
  5972. if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ADD, VT))
  5973. return SDValue();
  5974. // (add (mul N, P), A)
  5975. Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
  5976. Created.push_back(Op0.getNode());
  5977. }
  5978. // Rotate right only if any divisor was even. We avoid rotates for all-odd
  5979. // divisors as a performance improvement, since rotating by 0 is a no-op.
  5980. if (HadEvenDivisor) {
  5981. // We need ROTR to do this.
  5982. if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
  5983. return SDValue();
  5984. // SREM: (rotr (add (mul N, P), A), K)
  5985. Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
  5986. Created.push_back(Op0.getNode());
  5987. }
  5988. // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
  5989. SDValue Fold =
  5990. DAG.getSetCC(DL, SETCCVT, Op0, QVal,
  5991. ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
  5992. // If we didn't have lanes with INT_MIN divisor, then we're done.
  5993. if (!HadIntMinDivisor)
  5994. return Fold;
  5995. // That fold is only valid for positive divisors. Which effectively means,
  5996. // it is invalid for INT_MIN divisors. So if we have such a lane,
  5997. // we must fix-up results for said lanes.
  5998. assert(VT.isVector() && "Can/should only get here for vectors.");
  5999. // NOTE: we avoid letting illegal types through even if we're before legalize
  6000. // ops – legalization has a hard time producing good code for the code that
  6001. // follows.
  6002. if (!isOperationLegalOrCustom(ISD::SETEQ, VT) ||
  6003. !isOperationLegalOrCustom(ISD::AND, VT) ||
  6004. !isOperationLegalOrCustom(Cond, VT) ||
  6005. !isOperationLegalOrCustom(ISD::VSELECT, SETCCVT))
  6006. return SDValue();
  6007. Created.push_back(Fold.getNode());
  6008. SDValue IntMin = DAG.getConstant(
  6009. APInt::getSignedMinValue(SVT.getScalarSizeInBits()), DL, VT);
  6010. SDValue IntMax = DAG.getConstant(
  6011. APInt::getSignedMaxValue(SVT.getScalarSizeInBits()), DL, VT);
  6012. SDValue Zero =
  6013. DAG.getConstant(APInt::getZero(SVT.getScalarSizeInBits()), DL, VT);
  6014. // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
  6015. SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
  6016. Created.push_back(DivisorIsIntMin.getNode());
  6017. // (N s% INT_MIN) ==/!= 0 <--> (N & INT_MAX) ==/!= 0
  6018. SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax);
  6019. Created.push_back(Masked.getNode());
  6020. SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond);
  6021. Created.push_back(MaskedIsZero.getNode());
  6022. // To produce final result we need to blend 2 vectors: 'SetCC' and
  6023. // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
  6024. // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
  6025. // constant-folded, select can get lowered to a shuffle with constant mask.
  6026. SDValue Blended = DAG.getNode(ISD::VSELECT, DL, SETCCVT, DivisorIsIntMin,
  6027. MaskedIsZero, Fold);
  6028. return Blended;
  6029. }
  6030. bool TargetLowering::
  6031. verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
  6032. if (!isa<ConstantSDNode>(Op.getOperand(0))) {
  6033. DAG.getContext()->emitError("argument to '__builtin_return_address' must "
  6034. "be a constant integer");
  6035. return true;
  6036. }
  6037. return false;
  6038. }
  6039. SDValue TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
  6040. const DenormalMode &Mode) const {
  6041. SDLoc DL(Op);
  6042. EVT VT = Op.getValueType();
  6043. EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
  6044. SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
  6045. // Testing it with denormal inputs to avoid wrong estimate.
  6046. if (Mode.Input == DenormalMode::IEEE) {
  6047. // This is specifically a check for the handling of denormal inputs,
  6048. // not the result.
  6049. // Test = fabs(X) < SmallestNormal
  6050. const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
  6051. APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
  6052. SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
  6053. SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
  6054. return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
  6055. }
  6056. // Test = X == 0.0
  6057. return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
  6058. }
  6059. SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
  6060. bool LegalOps, bool OptForSize,
  6061. NegatibleCost &Cost,
  6062. unsigned Depth) const {
  6063. // fneg is removable even if it has multiple uses.
  6064. if (Op.getOpcode() == ISD::FNEG) {
  6065. Cost = NegatibleCost::Cheaper;
  6066. return Op.getOperand(0);
  6067. }
  6068. // Don't recurse exponentially.
  6069. if (Depth > SelectionDAG::MaxRecursionDepth)
  6070. return SDValue();
  6071. // Pre-increment recursion depth for use in recursive calls.
  6072. ++Depth;
  6073. const SDNodeFlags Flags = Op->getFlags();
  6074. const TargetOptions &Options = DAG.getTarget().Options;
  6075. EVT VT = Op.getValueType();
  6076. unsigned Opcode = Op.getOpcode();
  6077. // Don't allow anything with multiple uses unless we know it is free.
  6078. if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
  6079. bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
  6080. isFPExtFree(VT, Op.getOperand(0).getValueType());
  6081. if (!IsFreeExtend)
  6082. return SDValue();
  6083. }
  6084. auto RemoveDeadNode = [&](SDValue N) {
  6085. if (N && N.getNode()->use_empty())
  6086. DAG.RemoveDeadNode(N.getNode());
  6087. };
  6088. SDLoc DL(Op);
  6089. // Because getNegatedExpression can delete nodes we need a handle to keep
  6090. // temporary nodes alive in case the recursion manages to create an identical
  6091. // node.
  6092. std::list<HandleSDNode> Handles;
  6093. switch (Opcode) {
  6094. case ISD::ConstantFP: {
  6095. // Don't invert constant FP values after legalization unless the target says
  6096. // the negated constant is legal.
  6097. bool IsOpLegal =
  6098. isOperationLegal(ISD::ConstantFP, VT) ||
  6099. isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
  6100. OptForSize);
  6101. if (LegalOps && !IsOpLegal)
  6102. break;
  6103. APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
  6104. V.changeSign();
  6105. SDValue CFP = DAG.getConstantFP(V, DL, VT);
  6106. // If we already have the use of the negated floating constant, it is free
  6107. // to negate it even it has multiple uses.
  6108. if (!Op.hasOneUse() && CFP.use_empty())
  6109. break;
  6110. Cost = NegatibleCost::Neutral;
  6111. return CFP;
  6112. }
  6113. case ISD::BUILD_VECTOR: {
  6114. // Only permit BUILD_VECTOR of constants.
  6115. if (llvm::any_of(Op->op_values(), [&](SDValue N) {
  6116. return !N.isUndef() && !isa<ConstantFPSDNode>(N);
  6117. }))
  6118. break;
  6119. bool IsOpLegal =
  6120. (isOperationLegal(ISD::ConstantFP, VT) &&
  6121. isOperationLegal(ISD::BUILD_VECTOR, VT)) ||
  6122. llvm::all_of(Op->op_values(), [&](SDValue N) {
  6123. return N.isUndef() ||
  6124. isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
  6125. OptForSize);
  6126. });
  6127. if (LegalOps && !IsOpLegal)
  6128. break;
  6129. SmallVector<SDValue, 4> Ops;
  6130. for (SDValue C : Op->op_values()) {
  6131. if (C.isUndef()) {
  6132. Ops.push_back(C);
  6133. continue;
  6134. }
  6135. APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
  6136. V.changeSign();
  6137. Ops.push_back(DAG.getConstantFP(V, DL, C.getValueType()));
  6138. }
  6139. Cost = NegatibleCost::Neutral;
  6140. return DAG.getBuildVector(VT, DL, Ops);
  6141. }
  6142. case ISD::FADD: {
  6143. if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
  6144. break;
  6145. // After operation legalization, it might not be legal to create new FSUBs.
  6146. if (LegalOps && !isOperationLegalOrCustom(ISD::FSUB, VT))
  6147. break;
  6148. SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
  6149. // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
  6150. NegatibleCost CostX = NegatibleCost::Expensive;
  6151. SDValue NegX =
  6152. getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
  6153. // Prevent this node from being deleted by the next call.
  6154. if (NegX)
  6155. Handles.emplace_back(NegX);
  6156. // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
  6157. NegatibleCost CostY = NegatibleCost::Expensive;
  6158. SDValue NegY =
  6159. getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
  6160. // We're done with the handles.
  6161. Handles.clear();
  6162. // Negate the X if its cost is less or equal than Y.
  6163. if (NegX && (CostX <= CostY)) {
  6164. Cost = CostX;
  6165. SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
  6166. if (NegY != N)
  6167. RemoveDeadNode(NegY);
  6168. return N;
  6169. }
  6170. // Negate the Y if it is not expensive.
  6171. if (NegY) {
  6172. Cost = CostY;
  6173. SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
  6174. if (NegX != N)
  6175. RemoveDeadNode(NegX);
  6176. return N;
  6177. }
  6178. break;
  6179. }
  6180. case ISD::FSUB: {
  6181. // We can't turn -(A-B) into B-A when we honor signed zeros.
  6182. if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
  6183. break;
  6184. SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
  6185. // fold (fneg (fsub 0, Y)) -> Y
  6186. if (ConstantFPSDNode *C = isConstOrConstSplatFP(X, /*AllowUndefs*/ true))
  6187. if (C->isZero()) {
  6188. Cost = NegatibleCost::Cheaper;
  6189. return Y;
  6190. }
  6191. // fold (fneg (fsub X, Y)) -> (fsub Y, X)
  6192. Cost = NegatibleCost::Neutral;
  6193. return DAG.getNode(ISD::FSUB, DL, VT, Y, X, Flags);
  6194. }
  6195. case ISD::FMUL:
  6196. case ISD::FDIV: {
  6197. SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
  6198. // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
  6199. NegatibleCost CostX = NegatibleCost::Expensive;
  6200. SDValue NegX =
  6201. getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
  6202. // Prevent this node from being deleted by the next call.
  6203. if (NegX)
  6204. Handles.emplace_back(NegX);
  6205. // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
  6206. NegatibleCost CostY = NegatibleCost::Expensive;
  6207. SDValue NegY =
  6208. getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
  6209. // We're done with the handles.
  6210. Handles.clear();
  6211. // Negate the X if its cost is less or equal than Y.
  6212. if (NegX && (CostX <= CostY)) {
  6213. Cost = CostX;
  6214. SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
  6215. if (NegY != N)
  6216. RemoveDeadNode(NegY);
  6217. return N;
  6218. }
  6219. // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
  6220. if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
  6221. if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
  6222. break;
  6223. // Negate the Y if it is not expensive.
  6224. if (NegY) {
  6225. Cost = CostY;
  6226. SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
  6227. if (NegX != N)
  6228. RemoveDeadNode(NegX);
  6229. return N;
  6230. }
  6231. break;
  6232. }
  6233. case ISD::FMA:
  6234. case ISD::FMAD: {
  6235. if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
  6236. break;
  6237. SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2);
  6238. NegatibleCost CostZ = NegatibleCost::Expensive;
  6239. SDValue NegZ =
  6240. getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ, Depth);
  6241. // Give up if fail to negate the Z.
  6242. if (!NegZ)
  6243. break;
  6244. // Prevent this node from being deleted by the next two calls.
  6245. Handles.emplace_back(NegZ);
  6246. // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
  6247. NegatibleCost CostX = NegatibleCost::Expensive;
  6248. SDValue NegX =
  6249. getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
  6250. // Prevent this node from being deleted by the next call.
  6251. if (NegX)
  6252. Handles.emplace_back(NegX);
  6253. // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
  6254. NegatibleCost CostY = NegatibleCost::Expensive;
  6255. SDValue NegY =
  6256. getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
  6257. // We're done with the handles.
  6258. Handles.clear();
  6259. // Negate the X if its cost is less or equal than Y.
  6260. if (NegX && (CostX <= CostY)) {
  6261. Cost = std::min(CostX, CostZ);
  6262. SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
  6263. if (NegY != N)
  6264. RemoveDeadNode(NegY);
  6265. return N;
  6266. }
  6267. // Negate the Y if it is not expensive.
  6268. if (NegY) {
  6269. Cost = std::min(CostY, CostZ);
  6270. SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
  6271. if (NegX != N)
  6272. RemoveDeadNode(NegX);
  6273. return N;
  6274. }
  6275. break;
  6276. }
  6277. case ISD::FP_EXTEND:
  6278. case ISD::FSIN:
  6279. if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
  6280. OptForSize, Cost, Depth))
  6281. return DAG.getNode(Opcode, DL, VT, NegV);
  6282. break;
  6283. case ISD::FP_ROUND:
  6284. if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
  6285. OptForSize, Cost, Depth))
  6286. return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1));
  6287. break;
  6288. case ISD::SELECT:
  6289. case ISD::VSELECT: {
  6290. // fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))
  6291. // iff at least one cost is cheaper and the other is neutral/cheaper
  6292. SDValue LHS = Op.getOperand(1);
  6293. NegatibleCost CostLHS = NegatibleCost::Expensive;
  6294. SDValue NegLHS =
  6295. getNegatedExpression(LHS, DAG, LegalOps, OptForSize, CostLHS, Depth);
  6296. if (!NegLHS || CostLHS > NegatibleCost::Neutral) {
  6297. RemoveDeadNode(NegLHS);
  6298. break;
  6299. }
  6300. // Prevent this node from being deleted by the next call.
  6301. Handles.emplace_back(NegLHS);
  6302. SDValue RHS = Op.getOperand(2);
  6303. NegatibleCost CostRHS = NegatibleCost::Expensive;
  6304. SDValue NegRHS =
  6305. getNegatedExpression(RHS, DAG, LegalOps, OptForSize, CostRHS, Depth);
  6306. // We're done with the handles.
  6307. Handles.clear();
  6308. if (!NegRHS || CostRHS > NegatibleCost::Neutral ||
  6309. (CostLHS != NegatibleCost::Cheaper &&
  6310. CostRHS != NegatibleCost::Cheaper)) {
  6311. RemoveDeadNode(NegLHS);
  6312. RemoveDeadNode(NegRHS);
  6313. break;
  6314. }
  6315. Cost = std::min(CostLHS, CostRHS);
  6316. return DAG.getSelect(DL, VT, Op.getOperand(0), NegLHS, NegRHS);
  6317. }
  6318. }
  6319. return SDValue();
  6320. }
  6321. //===----------------------------------------------------------------------===//
  6322. // Legalization Utilities
  6323. //===----------------------------------------------------------------------===//
  6324. bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
  6325. SDValue LHS, SDValue RHS,
  6326. SmallVectorImpl<SDValue> &Result,
  6327. EVT HiLoVT, SelectionDAG &DAG,
  6328. MulExpansionKind Kind, SDValue LL,
  6329. SDValue LH, SDValue RL, SDValue RH) const {
  6330. assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
  6331. Opcode == ISD::SMUL_LOHI);
  6332. bool HasMULHS = (Kind == MulExpansionKind::Always) ||
  6333. isOperationLegalOrCustom(ISD::MULHS, HiLoVT);
  6334. bool HasMULHU = (Kind == MulExpansionKind::Always) ||
  6335. isOperationLegalOrCustom(ISD::MULHU, HiLoVT);
  6336. bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
  6337. isOperationLegalOrCustom(ISD::SMUL_LOHI, HiLoVT);
  6338. bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
  6339. isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT);
  6340. if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
  6341. return false;
  6342. unsigned OuterBitSize = VT.getScalarSizeInBits();
  6343. unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
  6344. // LL, LH, RL, and RH must be either all NULL or all set to a value.
  6345. assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
  6346. (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
  6347. SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
  6348. auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
  6349. bool Signed) -> bool {
  6350. if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
  6351. Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
  6352. Hi = SDValue(Lo.getNode(), 1);
  6353. return true;
  6354. }
  6355. if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
  6356. Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
  6357. Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
  6358. return true;
  6359. }
  6360. return false;
  6361. };
  6362. SDValue Lo, Hi;
  6363. if (!LL.getNode() && !RL.getNode() &&
  6364. isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
  6365. LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
  6366. RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
  6367. }
  6368. if (!LL.getNode())
  6369. return false;
  6370. APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
  6371. if (DAG.MaskedValueIsZero(LHS, HighMask) &&
  6372. DAG.MaskedValueIsZero(RHS, HighMask)) {
  6373. // The inputs are both zero-extended.
  6374. if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
  6375. Result.push_back(Lo);
  6376. Result.push_back(Hi);
  6377. if (Opcode != ISD::MUL) {
  6378. SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
  6379. Result.push_back(Zero);
  6380. Result.push_back(Zero);
  6381. }
  6382. return true;
  6383. }
  6384. }
  6385. if (!VT.isVector() && Opcode == ISD::MUL &&
  6386. DAG.ComputeMaxSignificantBits(LHS) <= InnerBitSize &&
  6387. DAG.ComputeMaxSignificantBits(RHS) <= InnerBitSize) {
  6388. // The input values are both sign-extended.
  6389. // TODO non-MUL case?
  6390. if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
  6391. Result.push_back(Lo);
  6392. Result.push_back(Hi);
  6393. return true;
  6394. }
  6395. }
  6396. unsigned ShiftAmount = OuterBitSize - InnerBitSize;
  6397. SDValue Shift = DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
  6398. if (!LH.getNode() && !RH.getNode() &&
  6399. isOperationLegalOrCustom(ISD::SRL, VT) &&
  6400. isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
  6401. LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
  6402. LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
  6403. RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
  6404. RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
  6405. }
  6406. if (!LH.getNode())
  6407. return false;
  6408. if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
  6409. return false;
  6410. Result.push_back(Lo);
  6411. if (Opcode == ISD::MUL) {
  6412. RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
  6413. LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
  6414. Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
  6415. Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
  6416. Result.push_back(Hi);
  6417. return true;
  6418. }
  6419. // Compute the full width result.
  6420. auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
  6421. Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
  6422. Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
  6423. Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
  6424. return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
  6425. };
  6426. SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
  6427. if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
  6428. return false;
  6429. // This is effectively the add part of a multiply-add of half-sized operands,
  6430. // so it cannot overflow.
  6431. Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
  6432. if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
  6433. return false;
  6434. SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
  6435. EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
  6436. bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
  6437. isOperationLegalOrCustom(ISD::ADDE, VT));
  6438. if (UseGlue)
  6439. Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
  6440. Merge(Lo, Hi));
  6441. else
  6442. Next = DAG.getNode(ISD::ADDCARRY, dl, DAG.getVTList(VT, BoolType), Next,
  6443. Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
  6444. SDValue Carry = Next.getValue(1);
  6445. Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
  6446. Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
  6447. if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
  6448. return false;
  6449. if (UseGlue)
  6450. Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
  6451. Carry);
  6452. else
  6453. Hi = DAG.getNode(ISD::ADDCARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
  6454. Zero, Carry);
  6455. Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
  6456. if (Opcode == ISD::SMUL_LOHI) {
  6457. SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
  6458. DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
  6459. Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
  6460. NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
  6461. DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
  6462. Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
  6463. }
  6464. Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
  6465. Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
  6466. Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
  6467. return true;
  6468. }
  6469. bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
  6470. SelectionDAG &DAG, MulExpansionKind Kind,
  6471. SDValue LL, SDValue LH, SDValue RL,
  6472. SDValue RH) const {
  6473. SmallVector<SDValue, 2> Result;
  6474. bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), SDLoc(N),
  6475. N->getOperand(0), N->getOperand(1), Result, HiLoVT,
  6476. DAG, Kind, LL, LH, RL, RH);
  6477. if (Ok) {
  6478. assert(Result.size() == 2);
  6479. Lo = Result[0];
  6480. Hi = Result[1];
  6481. }
  6482. return Ok;
  6483. }
  6484. // Optimize unsigned division or remainder by constants for types twice as large
  6485. // as a legal VT.
  6486. //
  6487. // If (1 << (BitWidth / 2)) % Constant == 1, then the remainder
  6488. // can be computed
  6489. // as:
  6490. // Sum += __builtin_uadd_overflow(Lo, High, &Sum);
  6491. // Remainder = Sum % Constant
  6492. // This is based on "Remainder by Summing Digits" from Hacker's Delight.
  6493. //
  6494. // For division, we can compute the remainder using the algorithm described
  6495. // above, subtract it from the dividend to get an exact multiple of Constant.
  6496. // Then multiply that extact multiply by the multiplicative inverse modulo
  6497. // (1 << (BitWidth / 2)) to get the quotient.
  6498. // If Constant is even, we can shift right the dividend and the divisor by the
  6499. // number of trailing zeros in Constant before applying the remainder algorithm.
  6500. // If we're after the quotient, we can subtract this value from the shifted
  6501. // dividend and multiply by the multiplicative inverse of the shifted divisor.
  6502. // If we want the remainder, we shift the value left by the number of trailing
  6503. // zeros and add the bits that were shifted out of the dividend.
  6504. bool TargetLowering::expandDIVREMByConstant(SDNode *N,
  6505. SmallVectorImpl<SDValue> &Result,
  6506. EVT HiLoVT, SelectionDAG &DAG,
  6507. SDValue LL, SDValue LH) const {
  6508. unsigned Opcode = N->getOpcode();
  6509. EVT VT = N->getValueType(0);
  6510. // TODO: Support signed division/remainder.
  6511. if (Opcode == ISD::SREM || Opcode == ISD::SDIV || Opcode == ISD::SDIVREM)
  6512. return false;
  6513. assert(
  6514. (Opcode == ISD::UREM || Opcode == ISD::UDIV || Opcode == ISD::UDIVREM) &&
  6515. "Unexpected opcode");
  6516. auto *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
  6517. if (!CN)
  6518. return false;
  6519. APInt Divisor = CN->getAPIntValue();
  6520. unsigned BitWidth = Divisor.getBitWidth();
  6521. unsigned HBitWidth = BitWidth / 2;
  6522. assert(VT.getScalarSizeInBits() == BitWidth &&
  6523. HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs");
  6524. // Divisor needs to less than (1 << HBitWidth).
  6525. APInt HalfMaxPlus1 = APInt::getOneBitSet(BitWidth, HBitWidth);
  6526. if (Divisor.uge(HalfMaxPlus1))
  6527. return false;
  6528. // We depend on the UREM by constant optimization in DAGCombiner that requires
  6529. // high multiply.
  6530. if (!isOperationLegalOrCustom(ISD::MULHU, HiLoVT) &&
  6531. !isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT))
  6532. return false;
  6533. // Don't expand if optimizing for size.
  6534. if (DAG.shouldOptForSize())
  6535. return false;
  6536. // Early out for 0 or 1 divisors.
  6537. if (Divisor.ule(1))
  6538. return false;
  6539. // If the divisor is even, shift it until it becomes odd.
  6540. unsigned TrailingZeros = 0;
  6541. if (!Divisor[0]) {
  6542. TrailingZeros = Divisor.countTrailingZeros();
  6543. Divisor.lshrInPlace(TrailingZeros);
  6544. }
  6545. SDLoc dl(N);
  6546. SDValue Sum;
  6547. SDValue PartialRem;
  6548. // If (1 << HBitWidth) % divisor == 1, we can add the two halves together and
  6549. // then add in the carry.
  6550. // TODO: If we can't split it in half, we might be able to split into 3 or
  6551. // more pieces using a smaller bit width.
  6552. if (HalfMaxPlus1.urem(Divisor).isOneValue()) {
  6553. assert(!LL == !LH && "Expected both input halves or no input halves!");
  6554. if (!LL) {
  6555. LL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HiLoVT, N->getOperand(0),
  6556. DAG.getIntPtrConstant(0, dl));
  6557. LH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HiLoVT, N->getOperand(0),
  6558. DAG.getIntPtrConstant(1, dl));
  6559. }
  6560. // Shift the input by the number of TrailingZeros in the divisor. The
  6561. // shifted out bits will be added to the remainder later.
  6562. if (TrailingZeros) {
  6563. // Save the shifted off bits if we need the remainder.
  6564. if (Opcode != ISD::UDIV) {
  6565. APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
  6566. PartialRem = DAG.getNode(ISD::AND, dl, HiLoVT, LL,
  6567. DAG.getConstant(Mask, dl, HiLoVT));
  6568. }
  6569. LL = DAG.getNode(
  6570. ISD::OR, dl, HiLoVT,
  6571. DAG.getNode(ISD::SRL, dl, HiLoVT, LL,
  6572. DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl)),
  6573. DAG.getNode(ISD::SHL, dl, HiLoVT, LH,
  6574. DAG.getShiftAmountConstant(HBitWidth - TrailingZeros,
  6575. HiLoVT, dl)));
  6576. LH = DAG.getNode(ISD::SRL, dl, HiLoVT, LH,
  6577. DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
  6578. }
  6579. // Use addcarry if we can, otherwise use a compare to detect overflow.
  6580. EVT SetCCType =
  6581. getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), HiLoVT);
  6582. if (isOperationLegalOrCustom(ISD::ADDCARRY, HiLoVT)) {
  6583. SDVTList VTList = DAG.getVTList(HiLoVT, SetCCType);
  6584. Sum = DAG.getNode(ISD::UADDO, dl, VTList, LL, LH);
  6585. Sum = DAG.getNode(ISD::ADDCARRY, dl, VTList, Sum,
  6586. DAG.getConstant(0, dl, HiLoVT), Sum.getValue(1));
  6587. } else {
  6588. Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, LL, LH);
  6589. SDValue Carry = DAG.getSetCC(dl, SetCCType, Sum, LL, ISD::SETULT);
  6590. // If the boolean for the target is 0 or 1, we can add the setcc result
  6591. // directly.
  6592. if (getBooleanContents(HiLoVT) ==
  6593. TargetLoweringBase::ZeroOrOneBooleanContent)
  6594. Carry = DAG.getZExtOrTrunc(Carry, dl, HiLoVT);
  6595. else
  6596. Carry = DAG.getSelect(dl, HiLoVT, Carry, DAG.getConstant(1, dl, HiLoVT),
  6597. DAG.getConstant(0, dl, HiLoVT));
  6598. Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum, Carry);
  6599. }
  6600. }
  6601. // If we didn't find a sum, we can't do the expansion.
  6602. if (!Sum)
  6603. return false;
  6604. // Perform a HiLoVT urem on the Sum using truncated divisor.
  6605. SDValue RemL =
  6606. DAG.getNode(ISD::UREM, dl, HiLoVT, Sum,
  6607. DAG.getConstant(Divisor.trunc(HBitWidth), dl, HiLoVT));
  6608. SDValue RemH = DAG.getConstant(0, dl, HiLoVT);
  6609. if (Opcode != ISD::UREM) {
  6610. // Subtract the remainder from the shifted dividend.
  6611. SDValue Dividend = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH);
  6612. SDValue Rem = DAG.getNode(ISD::BUILD_PAIR, dl, VT, RemL, RemH);
  6613. Dividend = DAG.getNode(ISD::SUB, dl, VT, Dividend, Rem);
  6614. // Multiply by the multiplicative inverse of the divisor modulo
  6615. // (1 << BitWidth).
  6616. APInt Mod = APInt::getSignedMinValue(BitWidth + 1);
  6617. APInt MulFactor = Divisor.zext(BitWidth + 1);
  6618. MulFactor = MulFactor.multiplicativeInverse(Mod);
  6619. MulFactor = MulFactor.trunc(BitWidth);
  6620. SDValue Quotient = DAG.getNode(ISD::MUL, dl, VT, Dividend,
  6621. DAG.getConstant(MulFactor, dl, VT));
  6622. // Split the quotient into low and high parts.
  6623. SDValue QuotL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HiLoVT, Quotient,
  6624. DAG.getIntPtrConstant(0, dl));
  6625. SDValue QuotH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HiLoVT, Quotient,
  6626. DAG.getIntPtrConstant(1, dl));
  6627. Result.push_back(QuotL);
  6628. Result.push_back(QuotH);
  6629. }
  6630. if (Opcode != ISD::UDIV) {
  6631. // If we shifted the input, shift the remainder left and add the bits we
  6632. // shifted off the input.
  6633. if (TrailingZeros) {
  6634. APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
  6635. RemL = DAG.getNode(ISD::SHL, dl, HiLoVT, RemL,
  6636. DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
  6637. RemL = DAG.getNode(ISD::ADD, dl, HiLoVT, RemL, PartialRem);
  6638. }
  6639. Result.push_back(RemL);
  6640. Result.push_back(DAG.getConstant(0, dl, HiLoVT));
  6641. }
  6642. return true;
  6643. }
  6644. // Check that (every element of) Z is undef or not an exact multiple of BW.
  6645. static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
  6646. return ISD::matchUnaryPredicate(
  6647. Z,
  6648. [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; },
  6649. true);
  6650. }
  6651. static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG) {
  6652. EVT VT = Node->getValueType(0);
  6653. SDValue ShX, ShY;
  6654. SDValue ShAmt, InvShAmt;
  6655. SDValue X = Node->getOperand(0);
  6656. SDValue Y = Node->getOperand(1);
  6657. SDValue Z = Node->getOperand(2);
  6658. SDValue Mask = Node->getOperand(3);
  6659. SDValue VL = Node->getOperand(4);
  6660. unsigned BW = VT.getScalarSizeInBits();
  6661. bool IsFSHL = Node->getOpcode() == ISD::VP_FSHL;
  6662. SDLoc DL(SDValue(Node, 0));
  6663. EVT ShVT = Z.getValueType();
  6664. if (isNonZeroModBitWidthOrUndef(Z, BW)) {
  6665. // fshl: X << C | Y >> (BW - C)
  6666. // fshr: X << (BW - C) | Y >> C
  6667. // where C = Z % BW is not zero
  6668. SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
  6669. ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
  6670. InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitWidthC, ShAmt, Mask, VL);
  6671. ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt, Mask,
  6672. VL);
  6673. ShY = DAG.getNode(ISD::VP_LSHR, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt, Mask,
  6674. VL);
  6675. } else {
  6676. // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
  6677. // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
  6678. SDValue BitMask = DAG.getConstant(BW - 1, DL, ShVT);
  6679. if (isPowerOf2_32(BW)) {
  6680. // Z % BW -> Z & (BW - 1)
  6681. ShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, Z, BitMask, Mask, VL);
  6682. // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
  6683. SDValue NotZ = DAG.getNode(ISD::VP_XOR, DL, ShVT, Z,
  6684. DAG.getAllOnesConstant(DL, ShVT), Mask, VL);
  6685. InvShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, NotZ, BitMask, Mask, VL);
  6686. } else {
  6687. SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
  6688. ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
  6689. InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitMask, ShAmt, Mask, VL);
  6690. }
  6691. SDValue One = DAG.getConstant(1, DL, ShVT);
  6692. if (IsFSHL) {
  6693. ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, ShAmt, Mask, VL);
  6694. SDValue ShY1 = DAG.getNode(ISD::VP_LSHR, DL, VT, Y, One, Mask, VL);
  6695. ShY = DAG.getNode(ISD::VP_LSHR, DL, VT, ShY1, InvShAmt, Mask, VL);
  6696. } else {
  6697. SDValue ShX1 = DAG.getNode(ISD::VP_SHL, DL, VT, X, One, Mask, VL);
  6698. ShX = DAG.getNode(ISD::VP_SHL, DL, VT, ShX1, InvShAmt, Mask, VL);
  6699. ShY = DAG.getNode(ISD::VP_LSHR, DL, VT, Y, ShAmt, Mask, VL);
  6700. }
  6701. }
  6702. return DAG.getNode(ISD::VP_OR, DL, VT, ShX, ShY, Mask, VL);
  6703. }
  6704. SDValue TargetLowering::expandFunnelShift(SDNode *Node,
  6705. SelectionDAG &DAG) const {
  6706. if (Node->isVPOpcode())
  6707. return expandVPFunnelShift(Node, DAG);
  6708. EVT VT = Node->getValueType(0);
  6709. if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
  6710. !isOperationLegalOrCustom(ISD::SRL, VT) ||
  6711. !isOperationLegalOrCustom(ISD::SUB, VT) ||
  6712. !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
  6713. return SDValue();
  6714. SDValue X = Node->getOperand(0);
  6715. SDValue Y = Node->getOperand(1);
  6716. SDValue Z = Node->getOperand(2);
  6717. unsigned BW = VT.getScalarSizeInBits();
  6718. bool IsFSHL = Node->getOpcode() == ISD::FSHL;
  6719. SDLoc DL(SDValue(Node, 0));
  6720. EVT ShVT = Z.getValueType();
  6721. // If a funnel shift in the other direction is more supported, use it.
  6722. unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
  6723. if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
  6724. isOperationLegalOrCustom(RevOpcode, VT) && isPowerOf2_32(BW)) {
  6725. if (isNonZeroModBitWidthOrUndef(Z, BW)) {
  6726. // fshl X, Y, Z -> fshr X, Y, -Z
  6727. // fshr X, Y, Z -> fshl X, Y, -Z
  6728. SDValue Zero = DAG.getConstant(0, DL, ShVT);
  6729. Z = DAG.getNode(ISD::SUB, DL, VT, Zero, Z);
  6730. } else {
  6731. // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
  6732. // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
  6733. SDValue One = DAG.getConstant(1, DL, ShVT);
  6734. if (IsFSHL) {
  6735. Y = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
  6736. X = DAG.getNode(ISD::SRL, DL, VT, X, One);
  6737. } else {
  6738. X = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
  6739. Y = DAG.getNode(ISD::SHL, DL, VT, Y, One);
  6740. }
  6741. Z = DAG.getNOT(DL, Z, ShVT);
  6742. }
  6743. return DAG.getNode(RevOpcode, DL, VT, X, Y, Z);
  6744. }
  6745. SDValue ShX, ShY;
  6746. SDValue ShAmt, InvShAmt;
  6747. if (isNonZeroModBitWidthOrUndef(Z, BW)) {
  6748. // fshl: X << C | Y >> (BW - C)
  6749. // fshr: X << (BW - C) | Y >> C
  6750. // where C = Z % BW is not zero
  6751. SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
  6752. ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
  6753. InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
  6754. ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
  6755. ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
  6756. } else {
  6757. // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
  6758. // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
  6759. SDValue Mask = DAG.getConstant(BW - 1, DL, ShVT);
  6760. if (isPowerOf2_32(BW)) {
  6761. // Z % BW -> Z & (BW - 1)
  6762. ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
  6763. // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
  6764. InvShAmt = DAG.getNode(ISD::AND, DL, ShVT, DAG.getNOT(DL, Z, ShVT), Mask);
  6765. } else {
  6766. SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
  6767. ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
  6768. InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, Mask, ShAmt);
  6769. }
  6770. SDValue One = DAG.getConstant(1, DL, ShVT);
  6771. if (IsFSHL) {
  6772. ShX = DAG.getNode(ISD::SHL, DL, VT, X, ShAmt);
  6773. SDValue ShY1 = DAG.getNode(ISD::SRL, DL, VT, Y, One);
  6774. ShY = DAG.getNode(ISD::SRL, DL, VT, ShY1, InvShAmt);
  6775. } else {
  6776. SDValue ShX1 = DAG.getNode(ISD::SHL, DL, VT, X, One);
  6777. ShX = DAG.getNode(ISD::SHL, DL, VT, ShX1, InvShAmt);
  6778. ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt);
  6779. }
  6780. }
  6781. return DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
  6782. }
  6783. // TODO: Merge with expandFunnelShift.
  6784. SDValue TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
  6785. SelectionDAG &DAG) const {
  6786. EVT VT = Node->getValueType(0);
  6787. unsigned EltSizeInBits = VT.getScalarSizeInBits();
  6788. bool IsLeft = Node->getOpcode() == ISD::ROTL;
  6789. SDValue Op0 = Node->getOperand(0);
  6790. SDValue Op1 = Node->getOperand(1);
  6791. SDLoc DL(SDValue(Node, 0));
  6792. EVT ShVT = Op1.getValueType();
  6793. SDValue Zero = DAG.getConstant(0, DL, ShVT);
  6794. // If a rotate in the other direction is more supported, use it.
  6795. unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
  6796. if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
  6797. isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) {
  6798. SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
  6799. return DAG.getNode(RevRot, DL, VT, Op0, Sub);
  6800. }
  6801. if (!AllowVectorOps && VT.isVector() &&
  6802. (!isOperationLegalOrCustom(ISD::SHL, VT) ||
  6803. !isOperationLegalOrCustom(ISD::SRL, VT) ||
  6804. !isOperationLegalOrCustom(ISD::SUB, VT) ||
  6805. !isOperationLegalOrCustomOrPromote(ISD::OR, VT) ||
  6806. !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
  6807. return SDValue();
  6808. unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
  6809. unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
  6810. SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
  6811. SDValue ShVal;
  6812. SDValue HsVal;
  6813. if (isPowerOf2_32(EltSizeInBits)) {
  6814. // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
  6815. // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
  6816. SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
  6817. SDValue ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
  6818. ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
  6819. SDValue HsAmt = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
  6820. HsVal = DAG.getNode(HsOpc, DL, VT, Op0, HsAmt);
  6821. } else {
  6822. // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
  6823. // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
  6824. SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
  6825. SDValue ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Op1, BitWidthC);
  6826. ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
  6827. SDValue HsAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthMinusOneC, ShAmt);
  6828. SDValue One = DAG.getConstant(1, DL, ShVT);
  6829. HsVal =
  6830. DAG.getNode(HsOpc, DL, VT, DAG.getNode(HsOpc, DL, VT, Op0, One), HsAmt);
  6831. }
  6832. return DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal);
  6833. }
  6834. void TargetLowering::expandShiftParts(SDNode *Node, SDValue &Lo, SDValue &Hi,
  6835. SelectionDAG &DAG) const {
  6836. assert(Node->getNumOperands() == 3 && "Not a double-shift!");
  6837. EVT VT = Node->getValueType(0);
  6838. unsigned VTBits = VT.getScalarSizeInBits();
  6839. assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
  6840. bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
  6841. bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
  6842. SDValue ShOpLo = Node->getOperand(0);
  6843. SDValue ShOpHi = Node->getOperand(1);
  6844. SDValue ShAmt = Node->getOperand(2);
  6845. EVT ShAmtVT = ShAmt.getValueType();
  6846. EVT ShAmtCCVT =
  6847. getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShAmtVT);
  6848. SDLoc dl(Node);
  6849. // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
  6850. // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
  6851. // away during isel.
  6852. SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
  6853. DAG.getConstant(VTBits - 1, dl, ShAmtVT));
  6854. SDValue Tmp1 = IsSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
  6855. DAG.getConstant(VTBits - 1, dl, ShAmtVT))
  6856. : DAG.getConstant(0, dl, VT);
  6857. SDValue Tmp2, Tmp3;
  6858. if (IsSHL) {
  6859. Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt);
  6860. Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
  6861. } else {
  6862. Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt);
  6863. Tmp3 = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
  6864. }
  6865. // If the shift amount is larger or equal than the width of a part we don't
  6866. // use the result from the FSHL/FSHR. Insert a test and select the appropriate
  6867. // values for large shift amounts.
  6868. SDValue AndNode = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
  6869. DAG.getConstant(VTBits, dl, ShAmtVT));
  6870. SDValue Cond = DAG.getSetCC(dl, ShAmtCCVT, AndNode,
  6871. DAG.getConstant(0, dl, ShAmtVT), ISD::SETNE);
  6872. if (IsSHL) {
  6873. Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
  6874. Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
  6875. } else {
  6876. Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
  6877. Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
  6878. }
  6879. }
  6880. bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
  6881. SelectionDAG &DAG) const {
  6882. unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
  6883. SDValue Src = Node->getOperand(OpNo);
  6884. EVT SrcVT = Src.getValueType();
  6885. EVT DstVT = Node->getValueType(0);
  6886. SDLoc dl(SDValue(Node, 0));
  6887. // FIXME: Only f32 to i64 conversions are supported.
  6888. if (SrcVT != MVT::f32 || DstVT != MVT::i64)
  6889. return false;
  6890. if (Node->isStrictFPOpcode())
  6891. // When a NaN is converted to an integer a trap is allowed. We can't
  6892. // use this expansion here because it would eliminate that trap. Other
  6893. // traps are also allowed and cannot be eliminated. See
  6894. // IEEE 754-2008 sec 5.8.
  6895. return false;
  6896. // Expand f32 -> i64 conversion
  6897. // This algorithm comes from compiler-rt's implementation of fixsfdi:
  6898. // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
  6899. unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
  6900. EVT IntVT = SrcVT.changeTypeToInteger();
  6901. EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
  6902. SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
  6903. SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
  6904. SDValue Bias = DAG.getConstant(127, dl, IntVT);
  6905. SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
  6906. SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
  6907. SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
  6908. SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
  6909. SDValue ExponentBits = DAG.getNode(
  6910. ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
  6911. DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
  6912. SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
  6913. SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
  6914. DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
  6915. DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
  6916. Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
  6917. SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
  6918. DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
  6919. DAG.getConstant(0x00800000, dl, IntVT));
  6920. R = DAG.getZExtOrTrunc(R, dl, DstVT);
  6921. R = DAG.getSelectCC(
  6922. dl, Exponent, ExponentLoBit,
  6923. DAG.getNode(ISD::SHL, dl, DstVT, R,
  6924. DAG.getZExtOrTrunc(
  6925. DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
  6926. dl, IntShVT)),
  6927. DAG.getNode(ISD::SRL, dl, DstVT, R,
  6928. DAG.getZExtOrTrunc(
  6929. DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
  6930. dl, IntShVT)),
  6931. ISD::SETGT);
  6932. SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
  6933. DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
  6934. Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
  6935. DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
  6936. return true;
  6937. }
  6938. bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
  6939. SDValue &Chain,
  6940. SelectionDAG &DAG) const {
  6941. SDLoc dl(SDValue(Node, 0));
  6942. unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
  6943. SDValue Src = Node->getOperand(OpNo);
  6944. EVT SrcVT = Src.getValueType();
  6945. EVT DstVT = Node->getValueType(0);
  6946. EVT SetCCVT =
  6947. getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
  6948. EVT DstSetCCVT =
  6949. getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
  6950. // Only expand vector types if we have the appropriate vector bit operations.
  6951. unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
  6952. ISD::FP_TO_SINT;
  6953. if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
  6954. !isOperationLegalOrCustomOrPromote(ISD::XOR, SrcVT)))
  6955. return false;
  6956. // If the maximum float value is smaller then the signed integer range,
  6957. // the destination signmask can't be represented by the float, so we can
  6958. // just use FP_TO_SINT directly.
  6959. const fltSemantics &APFSem = DAG.EVTToAPFloatSemantics(SrcVT);
  6960. APFloat APF(APFSem, APInt::getZero(SrcVT.getScalarSizeInBits()));
  6961. APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
  6962. if (APFloat::opOverflow &
  6963. APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
  6964. if (Node->isStrictFPOpcode()) {
  6965. Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
  6966. { Node->getOperand(0), Src });
  6967. Chain = Result.getValue(1);
  6968. } else
  6969. Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
  6970. return true;
  6971. }
  6972. // Don't expand it if there isn't cheap fsub instruction.
  6973. if (!isOperationLegalOrCustom(
  6974. Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, SrcVT))
  6975. return false;
  6976. SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
  6977. SDValue Sel;
  6978. if (Node->isStrictFPOpcode()) {
  6979. Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
  6980. Node->getOperand(0), /*IsSignaling*/ true);
  6981. Chain = Sel.getValue(1);
  6982. } else {
  6983. Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
  6984. }
  6985. bool Strict = Node->isStrictFPOpcode() ||
  6986. shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
  6987. if (Strict) {
  6988. // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
  6989. // signmask then offset (the result of which should be fully representable).
  6990. // Sel = Src < 0x8000000000000000
  6991. // FltOfs = select Sel, 0, 0x8000000000000000
  6992. // IntOfs = select Sel, 0, 0x8000000000000000
  6993. // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
  6994. // TODO: Should any fast-math-flags be set for the FSUB?
  6995. SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
  6996. DAG.getConstantFP(0.0, dl, SrcVT), Cst);
  6997. Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
  6998. SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
  6999. DAG.getConstant(0, dl, DstVT),
  7000. DAG.getConstant(SignMask, dl, DstVT));
  7001. SDValue SInt;
  7002. if (Node->isStrictFPOpcode()) {
  7003. SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
  7004. { Chain, Src, FltOfs });
  7005. SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
  7006. { Val.getValue(1), Val });
  7007. Chain = SInt.getValue(1);
  7008. } else {
  7009. SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
  7010. SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
  7011. }
  7012. Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
  7013. } else {
  7014. // Expand based on maximum range of FP_TO_SINT:
  7015. // True = fp_to_sint(Src)
  7016. // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
  7017. // Result = select (Src < 0x8000000000000000), True, False
  7018. SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
  7019. // TODO: Should any fast-math-flags be set for the FSUB?
  7020. SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
  7021. DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
  7022. False = DAG.getNode(ISD::XOR, dl, DstVT, False,
  7023. DAG.getConstant(SignMask, dl, DstVT));
  7024. Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
  7025. Result = DAG.getSelect(dl, DstVT, Sel, True, False);
  7026. }
  7027. return true;
  7028. }
  7029. bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
  7030. SDValue &Chain,
  7031. SelectionDAG &DAG) const {
  7032. // This transform is not correct for converting 0 when rounding mode is set
  7033. // to round toward negative infinity which will produce -0.0. So disable under
  7034. // strictfp.
  7035. if (Node->isStrictFPOpcode())
  7036. return false;
  7037. SDValue Src = Node->getOperand(0);
  7038. EVT SrcVT = Src.getValueType();
  7039. EVT DstVT = Node->getValueType(0);
  7040. if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
  7041. return false;
  7042. // Only expand vector types if we have the appropriate vector bit operations.
  7043. if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
  7044. !isOperationLegalOrCustom(ISD::FADD, DstVT) ||
  7045. !isOperationLegalOrCustom(ISD::FSUB, DstVT) ||
  7046. !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) ||
  7047. !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT)))
  7048. return false;
  7049. SDLoc dl(SDValue(Node, 0));
  7050. EVT ShiftVT = getShiftAmountTy(SrcVT, DAG.getDataLayout());
  7051. // Implementation of unsigned i64 to f64 following the algorithm in
  7052. // __floatundidf in compiler_rt. This implementation performs rounding
  7053. // correctly in all rounding modes with the exception of converting 0
  7054. // when rounding toward negative infinity. In that case the fsub will produce
  7055. // -0.0. This will be added to +0.0 and produce -0.0 which is incorrect.
  7056. SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
  7057. SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
  7058. BitsToDouble(UINT64_C(0x4530000000100000)), dl, DstVT);
  7059. SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
  7060. SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
  7061. SDValue HiShift = DAG.getConstant(32, dl, ShiftVT);
  7062. SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
  7063. SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
  7064. SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
  7065. SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
  7066. SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
  7067. SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
  7068. SDValue HiSub =
  7069. DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
  7070. Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
  7071. return true;
  7072. }
  7073. SDValue
  7074. TargetLowering::createSelectForFMINNUM_FMAXNUM(SDNode *Node,
  7075. SelectionDAG &DAG) const {
  7076. unsigned Opcode = Node->getOpcode();
  7077. assert((Opcode == ISD::FMINNUM || Opcode == ISD::FMAXNUM ||
  7078. Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) &&
  7079. "Wrong opcode");
  7080. if (Node->getFlags().hasNoNaNs()) {
  7081. ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
  7082. SDValue Op1 = Node->getOperand(0);
  7083. SDValue Op2 = Node->getOperand(1);
  7084. SDValue SelCC = DAG.getSelectCC(SDLoc(Node), Op1, Op2, Op1, Op2, Pred);
  7085. // Copy FMF flags, but always set the no-signed-zeros flag
  7086. // as this is implied by the FMINNUM/FMAXNUM semantics.
  7087. SDNodeFlags Flags = Node->getFlags();
  7088. Flags.setNoSignedZeros(true);
  7089. SelCC->setFlags(Flags);
  7090. return SelCC;
  7091. }
  7092. return SDValue();
  7093. }
  7094. SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
  7095. SelectionDAG &DAG) const {
  7096. SDLoc dl(Node);
  7097. unsigned NewOp = Node->getOpcode() == ISD::FMINNUM ?
  7098. ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
  7099. EVT VT = Node->getValueType(0);
  7100. if (VT.isScalableVector())
  7101. report_fatal_error(
  7102. "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
  7103. if (isOperationLegalOrCustom(NewOp, VT)) {
  7104. SDValue Quiet0 = Node->getOperand(0);
  7105. SDValue Quiet1 = Node->getOperand(1);
  7106. if (!Node->getFlags().hasNoNaNs()) {
  7107. // Insert canonicalizes if it's possible we need to quiet to get correct
  7108. // sNaN behavior.
  7109. if (!DAG.isKnownNeverSNaN(Quiet0)) {
  7110. Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
  7111. Node->getFlags());
  7112. }
  7113. if (!DAG.isKnownNeverSNaN(Quiet1)) {
  7114. Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
  7115. Node->getFlags());
  7116. }
  7117. }
  7118. return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
  7119. }
  7120. // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
  7121. // instead if there are no NaNs.
  7122. if (Node->getFlags().hasNoNaNs()) {
  7123. unsigned IEEE2018Op =
  7124. Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
  7125. if (isOperationLegalOrCustom(IEEE2018Op, VT)) {
  7126. return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
  7127. Node->getOperand(1), Node->getFlags());
  7128. }
  7129. }
  7130. if (SDValue SelCC = createSelectForFMINNUM_FMAXNUM(Node, DAG))
  7131. return SelCC;
  7132. return SDValue();
  7133. }
  7134. SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
  7135. unsigned Test, SDNodeFlags Flags,
  7136. const SDLoc &DL,
  7137. SelectionDAG &DAG) const {
  7138. EVT OperandVT = Op.getValueType();
  7139. assert(OperandVT.isFloatingPoint());
  7140. // Degenerated cases.
  7141. if (Test == 0)
  7142. return DAG.getBoolConstant(false, DL, ResultVT, OperandVT);
  7143. if ((Test & fcAllFlags) == fcAllFlags)
  7144. return DAG.getBoolConstant(true, DL, ResultVT, OperandVT);
  7145. // PPC double double is a pair of doubles, of which the higher part determines
  7146. // the value class.
  7147. if (OperandVT == MVT::ppcf128) {
  7148. Op = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::f64, Op,
  7149. DAG.getConstant(1, DL, MVT::i32));
  7150. OperandVT = MVT::f64;
  7151. }
  7152. // Some checks may be represented as inversion of simpler check, for example
  7153. // "inf|normal|subnormal|zero" => !"nan".
  7154. bool IsInverted = false;
  7155. if (unsigned InvertedCheck = getInvertedFPClassTest(Test)) {
  7156. IsInverted = true;
  7157. Test = InvertedCheck;
  7158. }
  7159. // Floating-point type properties.
  7160. EVT ScalarFloatVT = OperandVT.getScalarType();
  7161. const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext());
  7162. const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
  7163. bool IsF80 = (ScalarFloatVT == MVT::f80);
  7164. // Some checks can be implemented using float comparisons, if floating point
  7165. // exceptions are ignored.
  7166. if (Flags.hasNoFPExcept() &&
  7167. isOperationLegalOrCustom(ISD::SETCC, OperandVT.getScalarType())) {
  7168. if (Test == fcZero)
  7169. return DAG.getSetCC(DL, ResultVT, Op,
  7170. DAG.getConstantFP(0.0, DL, OperandVT),
  7171. IsInverted ? ISD::SETUNE : ISD::SETOEQ);
  7172. if (Test == fcNan)
  7173. return DAG.getSetCC(DL, ResultVT, Op, Op,
  7174. IsInverted ? ISD::SETO : ISD::SETUO);
  7175. }
  7176. // In the general case use integer operations.
  7177. unsigned BitSize = OperandVT.getScalarSizeInBits();
  7178. EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), BitSize);
  7179. if (OperandVT.isVector())
  7180. IntVT = EVT::getVectorVT(*DAG.getContext(), IntVT,
  7181. OperandVT.getVectorElementCount());
  7182. SDValue OpAsInt = DAG.getBitcast(IntVT, Op);
  7183. // Various masks.
  7184. APInt SignBit = APInt::getSignMask(BitSize);
  7185. APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
  7186. APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
  7187. const unsigned ExplicitIntBitInF80 = 63;
  7188. APInt ExpMask = Inf;
  7189. if (IsF80)
  7190. ExpMask.clearBit(ExplicitIntBitInF80);
  7191. APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
  7192. APInt QNaNBitMask =
  7193. APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
  7194. APInt InvertionMask = APInt::getAllOnesValue(ResultVT.getScalarSizeInBits());
  7195. SDValue ValueMaskV = DAG.getConstant(ValueMask, DL, IntVT);
  7196. SDValue SignBitV = DAG.getConstant(SignBit, DL, IntVT);
  7197. SDValue ExpMaskV = DAG.getConstant(ExpMask, DL, IntVT);
  7198. SDValue ZeroV = DAG.getConstant(0, DL, IntVT);
  7199. SDValue InfV = DAG.getConstant(Inf, DL, IntVT);
  7200. SDValue ResultInvertionMask = DAG.getConstant(InvertionMask, DL, ResultVT);
  7201. SDValue Res;
  7202. const auto appendResult = [&](SDValue PartialRes) {
  7203. if (PartialRes) {
  7204. if (Res)
  7205. Res = DAG.getNode(ISD::OR, DL, ResultVT, Res, PartialRes);
  7206. else
  7207. Res = PartialRes;
  7208. }
  7209. };
  7210. SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set.
  7211. const auto getIntBitIsSet = [&]() -> SDValue {
  7212. if (!IntBitIsSetV) {
  7213. APInt IntBitMask(BitSize, 0);
  7214. IntBitMask.setBit(ExplicitIntBitInF80);
  7215. SDValue IntBitMaskV = DAG.getConstant(IntBitMask, DL, IntVT);
  7216. SDValue IntBitV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, IntBitMaskV);
  7217. IntBitIsSetV = DAG.getSetCC(DL, ResultVT, IntBitV, ZeroV, ISD::SETNE);
  7218. }
  7219. return IntBitIsSetV;
  7220. };
  7221. // Split the value into sign bit and absolute value.
  7222. SDValue AbsV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ValueMaskV);
  7223. SDValue SignV = DAG.getSetCC(DL, ResultVT, OpAsInt,
  7224. DAG.getConstant(0.0, DL, IntVT), ISD::SETLT);
  7225. // Tests that involve more than one class should be processed first.
  7226. SDValue PartialRes;
  7227. if (IsF80)
  7228. ; // Detect finite numbers of f80 by checking individual classes because
  7229. // they have different settings of the explicit integer bit.
  7230. else if ((Test & fcFinite) == fcFinite) {
  7231. // finite(V) ==> abs(V) < exp_mask
  7232. PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
  7233. Test &= ~fcFinite;
  7234. } else if ((Test & fcFinite) == fcPosFinite) {
  7235. // finite(V) && V > 0 ==> V < exp_mask
  7236. PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ExpMaskV, ISD::SETULT);
  7237. Test &= ~fcPosFinite;
  7238. } else if ((Test & fcFinite) == fcNegFinite) {
  7239. // finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
  7240. PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
  7241. PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
  7242. Test &= ~fcNegFinite;
  7243. }
  7244. appendResult(PartialRes);
  7245. // Check for individual classes.
  7246. if (unsigned PartialCheck = Test & fcZero) {
  7247. if (PartialCheck == fcPosZero)
  7248. PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ZeroV, ISD::SETEQ);
  7249. else if (PartialCheck == fcZero)
  7250. PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ZeroV, ISD::SETEQ);
  7251. else // ISD::fcNegZero
  7252. PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, SignBitV, ISD::SETEQ);
  7253. appendResult(PartialRes);
  7254. }
  7255. if (unsigned PartialCheck = Test & fcInf) {
  7256. if (PartialCheck == fcPosInf)
  7257. PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, InfV, ISD::SETEQ);
  7258. else if (PartialCheck == fcInf)
  7259. PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETEQ);
  7260. else { // ISD::fcNegInf
  7261. APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
  7262. SDValue NegInfV = DAG.getConstant(NegInf, DL, IntVT);
  7263. PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, NegInfV, ISD::SETEQ);
  7264. }
  7265. appendResult(PartialRes);
  7266. }
  7267. if (unsigned PartialCheck = Test & fcNan) {
  7268. APInt InfWithQnanBit = Inf | QNaNBitMask;
  7269. SDValue InfWithQnanBitV = DAG.getConstant(InfWithQnanBit, DL, IntVT);
  7270. if (PartialCheck == fcNan) {
  7271. // isnan(V) ==> abs(V) > int(inf)
  7272. PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
  7273. if (IsF80) {
  7274. // Recognize unsupported values as NaNs for compatibility with glibc.
  7275. // In them (exp(V)==0) == int_bit.
  7276. SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, AbsV, ExpMaskV);
  7277. SDValue ExpIsZero =
  7278. DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
  7279. SDValue IsPseudo =
  7280. DAG.getSetCC(DL, ResultVT, getIntBitIsSet(), ExpIsZero, ISD::SETEQ);
  7281. PartialRes = DAG.getNode(ISD::OR, DL, ResultVT, PartialRes, IsPseudo);
  7282. }
  7283. } else if (PartialCheck == fcQNan) {
  7284. // isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit)
  7285. PartialRes =
  7286. DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETGE);
  7287. } else { // ISD::fcSNan
  7288. // issignaling(V) ==> abs(V) > unsigned(Inf) &&
  7289. // abs(V) < (unsigned(Inf) | quiet_bit)
  7290. SDValue IsNan = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
  7291. SDValue IsNotQnan =
  7292. DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETLT);
  7293. PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, IsNan, IsNotQnan);
  7294. }
  7295. appendResult(PartialRes);
  7296. }
  7297. if (unsigned PartialCheck = Test & fcSubnormal) {
  7298. // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
  7299. // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
  7300. SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
  7301. SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT);
  7302. SDValue VMinusOneV =
  7303. DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT));
  7304. PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT);
  7305. if (PartialCheck == fcNegSubnormal)
  7306. PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
  7307. appendResult(PartialRes);
  7308. }
  7309. if (unsigned PartialCheck = Test & fcNormal) {
  7310. // isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
  7311. APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
  7312. SDValue ExpLSBV = DAG.getConstant(ExpLSB, DL, IntVT);
  7313. SDValue ExpMinus1 = DAG.getNode(ISD::SUB, DL, IntVT, AbsV, ExpLSBV);
  7314. APInt ExpLimit = ExpMask - ExpLSB;
  7315. SDValue ExpLimitV = DAG.getConstant(ExpLimit, DL, IntVT);
  7316. PartialRes = DAG.getSetCC(DL, ResultVT, ExpMinus1, ExpLimitV, ISD::SETULT);
  7317. if (PartialCheck == fcNegNormal)
  7318. PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
  7319. else if (PartialCheck == fcPosNormal) {
  7320. SDValue PosSignV =
  7321. DAG.getNode(ISD::XOR, DL, ResultVT, SignV, ResultInvertionMask);
  7322. PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, PosSignV);
  7323. }
  7324. if (IsF80)
  7325. PartialRes =
  7326. DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, getIntBitIsSet());
  7327. appendResult(PartialRes);
  7328. }
  7329. if (!Res)
  7330. return DAG.getConstant(IsInverted, DL, ResultVT);
  7331. if (IsInverted)
  7332. Res = DAG.getNode(ISD::XOR, DL, ResultVT, Res, ResultInvertionMask);
  7333. return Res;
  7334. }
  7335. // Only expand vector types if we have the appropriate vector bit operations.
  7336. static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
  7337. assert(VT.isVector() && "Expected vector type");
  7338. unsigned Len = VT.getScalarSizeInBits();
  7339. return TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
  7340. TLI.isOperationLegalOrCustom(ISD::SUB, VT) &&
  7341. TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
  7342. (Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) &&
  7343. TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT);
  7344. }
  7345. SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const {
  7346. SDLoc dl(Node);
  7347. EVT VT = Node->getValueType(0);
  7348. EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
  7349. SDValue Op = Node->getOperand(0);
  7350. unsigned Len = VT.getScalarSizeInBits();
  7351. assert(VT.isInteger() && "CTPOP not implemented for this type.");
  7352. // TODO: Add support for irregular type lengths.
  7353. if (!(Len <= 128 && Len % 8 == 0))
  7354. return SDValue();
  7355. // Only expand vector types if we have the appropriate vector bit operations.
  7356. if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
  7357. return SDValue();
  7358. // This is the "best" algorithm from
  7359. // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
  7360. SDValue Mask55 =
  7361. DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
  7362. SDValue Mask33 =
  7363. DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
  7364. SDValue Mask0F =
  7365. DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
  7366. // v = v - ((v >> 1) & 0x55555555...)
  7367. Op = DAG.getNode(ISD::SUB, dl, VT, Op,
  7368. DAG.getNode(ISD::AND, dl, VT,
  7369. DAG.getNode(ISD::SRL, dl, VT, Op,
  7370. DAG.getConstant(1, dl, ShVT)),
  7371. Mask55));
  7372. // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
  7373. Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
  7374. DAG.getNode(ISD::AND, dl, VT,
  7375. DAG.getNode(ISD::SRL, dl, VT, Op,
  7376. DAG.getConstant(2, dl, ShVT)),
  7377. Mask33));
  7378. // v = (v + (v >> 4)) & 0x0F0F0F0F...
  7379. Op = DAG.getNode(ISD::AND, dl, VT,
  7380. DAG.getNode(ISD::ADD, dl, VT, Op,
  7381. DAG.getNode(ISD::SRL, dl, VT, Op,
  7382. DAG.getConstant(4, dl, ShVT))),
  7383. Mask0F);
  7384. if (Len <= 8)
  7385. return Op;
  7386. // Avoid the multiply if we only have 2 bytes to add.
  7387. // TODO: Only doing this for scalars because vectors weren't as obviously
  7388. // improved.
  7389. if (Len == 16 && !VT.isVector()) {
  7390. // v = (v + (v >> 8)) & 0x00FF;
  7391. return DAG.getNode(ISD::AND, dl, VT,
  7392. DAG.getNode(ISD::ADD, dl, VT, Op,
  7393. DAG.getNode(ISD::SRL, dl, VT, Op,
  7394. DAG.getConstant(8, dl, ShVT))),
  7395. DAG.getConstant(0xFF, dl, VT));
  7396. }
  7397. // v = (v * 0x01010101...) >> (Len - 8)
  7398. SDValue Mask01 =
  7399. DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
  7400. return DAG.getNode(ISD::SRL, dl, VT,
  7401. DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
  7402. DAG.getConstant(Len - 8, dl, ShVT));
  7403. }
  7404. SDValue TargetLowering::expandVPCTPOP(SDNode *Node, SelectionDAG &DAG) const {
  7405. SDLoc dl(Node);
  7406. EVT VT = Node->getValueType(0);
  7407. EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
  7408. SDValue Op = Node->getOperand(0);
  7409. SDValue Mask = Node->getOperand(1);
  7410. SDValue VL = Node->getOperand(2);
  7411. unsigned Len = VT.getScalarSizeInBits();
  7412. assert(VT.isInteger() && "VP_CTPOP not implemented for this type.");
  7413. // TODO: Add support for irregular type lengths.
  7414. if (!(Len <= 128 && Len % 8 == 0))
  7415. return SDValue();
  7416. // This is same algorithm of expandCTPOP from
  7417. // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
  7418. SDValue Mask55 =
  7419. DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
  7420. SDValue Mask33 =
  7421. DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
  7422. SDValue Mask0F =
  7423. DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
  7424. SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
  7425. // v = v - ((v >> 1) & 0x55555555...)
  7426. Tmp1 = DAG.getNode(ISD::VP_AND, dl, VT,
  7427. DAG.getNode(ISD::VP_LSHR, dl, VT, Op,
  7428. DAG.getConstant(1, dl, ShVT), Mask, VL),
  7429. Mask55, Mask, VL);
  7430. Op = DAG.getNode(ISD::VP_SUB, dl, VT, Op, Tmp1, Mask, VL);
  7431. // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
  7432. Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Op, Mask33, Mask, VL);
  7433. Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT,
  7434. DAG.getNode(ISD::VP_LSHR, dl, VT, Op,
  7435. DAG.getConstant(2, dl, ShVT), Mask, VL),
  7436. Mask33, Mask, VL);
  7437. Op = DAG.getNode(ISD::VP_ADD, dl, VT, Tmp2, Tmp3, Mask, VL);
  7438. // v = (v + (v >> 4)) & 0x0F0F0F0F...
  7439. Tmp4 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(4, dl, ShVT),
  7440. Mask, VL),
  7441. Tmp5 = DAG.getNode(ISD::VP_ADD, dl, VT, Op, Tmp4, Mask, VL);
  7442. Op = DAG.getNode(ISD::VP_AND, dl, VT, Tmp5, Mask0F, Mask, VL);
  7443. if (Len <= 8)
  7444. return Op;
  7445. // v = (v * 0x01010101...) >> (Len - 8)
  7446. SDValue Mask01 =
  7447. DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
  7448. return DAG.getNode(ISD::VP_LSHR, dl, VT,
  7449. DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL),
  7450. DAG.getConstant(Len - 8, dl, ShVT), Mask, VL);
  7451. }
  7452. SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const {
  7453. SDLoc dl(Node);
  7454. EVT VT = Node->getValueType(0);
  7455. EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
  7456. SDValue Op = Node->getOperand(0);
  7457. unsigned NumBitsPerElt = VT.getScalarSizeInBits();
  7458. // If the non-ZERO_UNDEF version is supported we can use that instead.
  7459. if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
  7460. isOperationLegalOrCustom(ISD::CTLZ, VT))
  7461. return DAG.getNode(ISD::CTLZ, dl, VT, Op);
  7462. // If the ZERO_UNDEF version is supported use that and handle the zero case.
  7463. if (isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) {
  7464. EVT SetCCVT =
  7465. getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
  7466. SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
  7467. SDValue Zero = DAG.getConstant(0, dl, VT);
  7468. SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
  7469. return DAG.getSelect(dl, VT, SrcIsZero,
  7470. DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
  7471. }
  7472. // Only expand vector types if we have the appropriate vector bit operations.
  7473. // This includes the operations needed to expand CTPOP if it isn't supported.
  7474. if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
  7475. (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
  7476. !canExpandVectorCTPOP(*this, VT)) ||
  7477. !isOperationLegalOrCustom(ISD::SRL, VT) ||
  7478. !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
  7479. return SDValue();
  7480. // for now, we do this:
  7481. // x = x | (x >> 1);
  7482. // x = x | (x >> 2);
  7483. // ...
  7484. // x = x | (x >>16);
  7485. // x = x | (x >>32); // for 64-bit input
  7486. // return popcount(~x);
  7487. //
  7488. // Ref: "Hacker's Delight" by Henry Warren
  7489. for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
  7490. SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
  7491. Op = DAG.getNode(ISD::OR, dl, VT, Op,
  7492. DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
  7493. }
  7494. Op = DAG.getNOT(dl, Op, VT);
  7495. return DAG.getNode(ISD::CTPOP, dl, VT, Op);
  7496. }
  7497. SDValue TargetLowering::expandVPCTLZ(SDNode *Node, SelectionDAG &DAG) const {
  7498. SDLoc dl(Node);
  7499. EVT VT = Node->getValueType(0);
  7500. EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
  7501. SDValue Op = Node->getOperand(0);
  7502. SDValue Mask = Node->getOperand(1);
  7503. SDValue VL = Node->getOperand(2);
  7504. unsigned NumBitsPerElt = VT.getScalarSizeInBits();
  7505. // do this:
  7506. // x = x | (x >> 1);
  7507. // x = x | (x >> 2);
  7508. // ...
  7509. // x = x | (x >>16);
  7510. // x = x | (x >>32); // for 64-bit input
  7511. // return popcount(~x);
  7512. for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
  7513. SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
  7514. Op = DAG.getNode(ISD::VP_OR, dl, VT, Op,
  7515. DAG.getNode(ISD::VP_LSHR, dl, VT, Op, Tmp, Mask, VL), Mask,
  7516. VL);
  7517. }
  7518. Op = DAG.getNode(ISD::VP_XOR, dl, VT, Op, DAG.getConstant(-1, dl, VT), Mask,
  7519. VL);
  7520. return DAG.getNode(ISD::VP_CTPOP, dl, VT, Op, Mask, VL);
  7521. }
  7522. SDValue TargetLowering::CTTZTableLookup(SDNode *Node, SelectionDAG &DAG,
  7523. const SDLoc &DL, EVT VT, SDValue Op,
  7524. unsigned BitWidth) const {
  7525. if (BitWidth != 32 && BitWidth != 64)
  7526. return SDValue();
  7527. APInt DeBruijn = BitWidth == 32 ? APInt(32, 0x077CB531U)
  7528. : APInt(64, 0x0218A392CD3D5DBFULL);
  7529. const DataLayout &TD = DAG.getDataLayout();
  7530. MachinePointerInfo PtrInfo =
  7531. MachinePointerInfo::getConstantPool(DAG.getMachineFunction());
  7532. unsigned ShiftAmt = BitWidth - Log2_32(BitWidth);
  7533. SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
  7534. SDValue Lookup = DAG.getNode(
  7535. ISD::SRL, DL, VT,
  7536. DAG.getNode(ISD::MUL, DL, VT, DAG.getNode(ISD::AND, DL, VT, Op, Neg),
  7537. DAG.getConstant(DeBruijn, DL, VT)),
  7538. DAG.getConstant(ShiftAmt, DL, VT));
  7539. Lookup = DAG.getSExtOrTrunc(Lookup, DL, getPointerTy(TD));
  7540. SmallVector<uint8_t> Table(BitWidth, 0);
  7541. for (unsigned i = 0; i < BitWidth; i++) {
  7542. APInt Shl = DeBruijn.shl(i);
  7543. APInt Lshr = Shl.lshr(ShiftAmt);
  7544. Table[Lshr.getZExtValue()] = i;
  7545. }
  7546. // Create a ConstantArray in Constant Pool
  7547. auto *CA = ConstantDataArray::get(*DAG.getContext(), Table);
  7548. SDValue CPIdx = DAG.getConstantPool(CA, getPointerTy(TD),
  7549. TD.getPrefTypeAlign(CA->getType()));
  7550. SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getEntryNode(),
  7551. DAG.getMemBasePlusOffset(CPIdx, Lookup, DL),
  7552. PtrInfo, MVT::i8);
  7553. if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF)
  7554. return ExtLoad;
  7555. EVT SetCCVT =
  7556. getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
  7557. SDValue Zero = DAG.getConstant(0, DL, VT);
  7558. SDValue SrcIsZero = DAG.getSetCC(DL, SetCCVT, Op, Zero, ISD::SETEQ);
  7559. return DAG.getSelect(DL, VT, SrcIsZero,
  7560. DAG.getConstant(BitWidth, DL, VT), ExtLoad);
  7561. }
  7562. SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const {
  7563. SDLoc dl(Node);
  7564. EVT VT = Node->getValueType(0);
  7565. SDValue Op = Node->getOperand(0);
  7566. unsigned NumBitsPerElt = VT.getScalarSizeInBits();
  7567. // If the non-ZERO_UNDEF version is supported we can use that instead.
  7568. if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
  7569. isOperationLegalOrCustom(ISD::CTTZ, VT))
  7570. return DAG.getNode(ISD::CTTZ, dl, VT, Op);
  7571. // If the ZERO_UNDEF version is supported use that and handle the zero case.
  7572. if (isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) {
  7573. EVT SetCCVT =
  7574. getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
  7575. SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
  7576. SDValue Zero = DAG.getConstant(0, dl, VT);
  7577. SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
  7578. return DAG.getSelect(dl, VT, SrcIsZero,
  7579. DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
  7580. }
  7581. // Only expand vector types if we have the appropriate vector bit operations.
  7582. // This includes the operations needed to expand CTPOP if it isn't supported.
  7583. if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
  7584. (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
  7585. !isOperationLegalOrCustom(ISD::CTLZ, VT) &&
  7586. !canExpandVectorCTPOP(*this, VT)) ||
  7587. !isOperationLegalOrCustom(ISD::SUB, VT) ||
  7588. !isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
  7589. !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
  7590. return SDValue();
  7591. // Emit Table Lookup if ISD::CTLZ and ISD::CTPOP are not legal.
  7592. if (!VT.isVector() && isOperationExpand(ISD::CTPOP, VT) &&
  7593. !isOperationLegal(ISD::CTLZ, VT))
  7594. if (SDValue V = CTTZTableLookup(Node, DAG, dl, VT, Op, NumBitsPerElt))
  7595. return V;
  7596. // for now, we use: { return popcount(~x & (x - 1)); }
  7597. // unless the target has ctlz but not ctpop, in which case we use:
  7598. // { return 32 - nlz(~x & (x-1)); }
  7599. // Ref: "Hacker's Delight" by Henry Warren
  7600. SDValue Tmp = DAG.getNode(
  7601. ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
  7602. DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
  7603. // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
  7604. if (isOperationLegal(ISD::CTLZ, VT) && !isOperationLegal(ISD::CTPOP, VT)) {
  7605. return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
  7606. DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
  7607. }
  7608. return DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
  7609. }
  7610. SDValue TargetLowering::expandVPCTTZ(SDNode *Node, SelectionDAG &DAG) const {
  7611. SDValue Op = Node->getOperand(0);
  7612. SDValue Mask = Node->getOperand(1);
  7613. SDValue VL = Node->getOperand(2);
  7614. SDLoc dl(Node);
  7615. EVT VT = Node->getValueType(0);
  7616. // Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))
  7617. SDValue Not = DAG.getNode(ISD::VP_XOR, dl, VT, Op,
  7618. DAG.getConstant(-1, dl, VT), Mask, VL);
  7619. SDValue MinusOne = DAG.getNode(ISD::VP_SUB, dl, VT, Op,
  7620. DAG.getConstant(1, dl, VT), Mask, VL);
  7621. SDValue Tmp = DAG.getNode(ISD::VP_AND, dl, VT, Not, MinusOne, Mask, VL);
  7622. return DAG.getNode(ISD::VP_CTPOP, dl, VT, Tmp, Mask, VL);
  7623. }
  7624. SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
  7625. bool IsNegative) const {
  7626. SDLoc dl(N);
  7627. EVT VT = N->getValueType(0);
  7628. EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
  7629. SDValue Op = N->getOperand(0);
  7630. // abs(x) -> smax(x,sub(0,x))
  7631. if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
  7632. isOperationLegal(ISD::SMAX, VT)) {
  7633. SDValue Zero = DAG.getConstant(0, dl, VT);
  7634. return DAG.getNode(ISD::SMAX, dl, VT, Op,
  7635. DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
  7636. }
  7637. // abs(x) -> umin(x,sub(0,x))
  7638. if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
  7639. isOperationLegal(ISD::UMIN, VT)) {
  7640. SDValue Zero = DAG.getConstant(0, dl, VT);
  7641. Op = DAG.getFreeze(Op);
  7642. return DAG.getNode(ISD::UMIN, dl, VT, Op,
  7643. DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
  7644. }
  7645. // 0 - abs(x) -> smin(x, sub(0,x))
  7646. if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
  7647. isOperationLegal(ISD::SMIN, VT)) {
  7648. Op = DAG.getFreeze(Op);
  7649. SDValue Zero = DAG.getConstant(0, dl, VT);
  7650. return DAG.getNode(ISD::SMIN, dl, VT, Op,
  7651. DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
  7652. }
  7653. // Only expand vector types if we have the appropriate vector operations.
  7654. if (VT.isVector() &&
  7655. (!isOperationLegalOrCustom(ISD::SRA, VT) ||
  7656. (!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||
  7657. (IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||
  7658. !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
  7659. return SDValue();
  7660. Op = DAG.getFreeze(Op);
  7661. SDValue Shift =
  7662. DAG.getNode(ISD::SRA, dl, VT, Op,
  7663. DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, ShVT));
  7664. SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
  7665. // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
  7666. if (!IsNegative)
  7667. return DAG.getNode(ISD::SUB, dl, VT, Xor, Shift);
  7668. // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
  7669. return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
  7670. }
  7671. SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const {
  7672. SDLoc dl(N);
  7673. EVT VT = N->getValueType(0);
  7674. SDValue Op = N->getOperand(0);
  7675. if (!VT.isSimple())
  7676. return SDValue();
  7677. EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
  7678. SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
  7679. switch (VT.getSimpleVT().getScalarType().SimpleTy) {
  7680. default:
  7681. return SDValue();
  7682. case MVT::i16:
  7683. // Use a rotate by 8. This can be further expanded if necessary.
  7684. return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
  7685. case MVT::i32:
  7686. Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
  7687. Tmp3 = DAG.getNode(ISD::AND, dl, VT, Op,
  7688. DAG.getConstant(0xFF00, dl, VT));
  7689. Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT));
  7690. Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
  7691. Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT));
  7692. Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
  7693. Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
  7694. Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
  7695. return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
  7696. case MVT::i64:
  7697. Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
  7698. Tmp7 = DAG.getNode(ISD::AND, dl, VT, Op,
  7699. DAG.getConstant(255ULL<<8, dl, VT));
  7700. Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT));
  7701. Tmp6 = DAG.getNode(ISD::AND, dl, VT, Op,
  7702. DAG.getConstant(255ULL<<16, dl, VT));
  7703. Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT));
  7704. Tmp5 = DAG.getNode(ISD::AND, dl, VT, Op,
  7705. DAG.getConstant(255ULL<<24, dl, VT));
  7706. Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT));
  7707. Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
  7708. Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4,
  7709. DAG.getConstant(255ULL<<24, dl, VT));
  7710. Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
  7711. Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
  7712. DAG.getConstant(255ULL<<16, dl, VT));
  7713. Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
  7714. Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2,
  7715. DAG.getConstant(255ULL<<8, dl, VT));
  7716. Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
  7717. Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
  7718. Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
  7719. Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
  7720. Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
  7721. Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
  7722. Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
  7723. return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
  7724. }
  7725. }
  7726. SDValue TargetLowering::expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const {
  7727. SDLoc dl(N);
  7728. EVT VT = N->getValueType(0);
  7729. SDValue Op = N->getOperand(0);
  7730. SDValue Mask = N->getOperand(1);
  7731. SDValue EVL = N->getOperand(2);
  7732. if (!VT.isSimple())
  7733. return SDValue();
  7734. EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
  7735. SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
  7736. switch (VT.getSimpleVT().getScalarType().SimpleTy) {
  7737. default:
  7738. return SDValue();
  7739. case MVT::i16:
  7740. Tmp1 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
  7741. Mask, EVL);
  7742. Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
  7743. Mask, EVL);
  7744. return DAG.getNode(ISD::VP_OR, dl, VT, Tmp1, Tmp2, Mask, EVL);
  7745. case MVT::i32:
  7746. Tmp4 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
  7747. Mask, EVL);
  7748. Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Op, DAG.getConstant(0xFF00, dl, VT),
  7749. Mask, EVL);
  7750. Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT),
  7751. Mask, EVL);
  7752. Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
  7753. Mask, EVL);
  7754. Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
  7755. DAG.getConstant(0xFF00, dl, VT), Mask, EVL);
  7756. Tmp1 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
  7757. Mask, EVL);
  7758. Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
  7759. Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
  7760. return DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
  7761. case MVT::i64:
  7762. Tmp8 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
  7763. Mask, EVL);
  7764. Tmp7 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
  7765. DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
  7766. Tmp7 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT),
  7767. Mask, EVL);
  7768. Tmp6 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
  7769. DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
  7770. Tmp6 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT),
  7771. Mask, EVL);
  7772. Tmp5 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
  7773. DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
  7774. Tmp5 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT),
  7775. Mask, EVL);
  7776. Tmp4 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
  7777. Mask, EVL);
  7778. Tmp4 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp4,
  7779. DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
  7780. Tmp3 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
  7781. Mask, EVL);
  7782. Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp3,
  7783. DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
  7784. Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(40, dl, SHVT),
  7785. Mask, EVL);
  7786. Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
  7787. DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
  7788. Tmp1 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
  7789. Mask, EVL);
  7790. Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp7, Mask, EVL);
  7791. Tmp6 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp6, Tmp5, Mask, EVL);
  7792. Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
  7793. Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
  7794. Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp6, Mask, EVL);
  7795. Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
  7796. return DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp4, Mask, EVL);
  7797. }
  7798. }
  7799. SDValue TargetLowering::expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
  7800. SDLoc dl(N);
  7801. EVT VT = N->getValueType(0);
  7802. SDValue Op = N->getOperand(0);
  7803. EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
  7804. unsigned Sz = VT.getScalarSizeInBits();
  7805. SDValue Tmp, Tmp2, Tmp3;
  7806. // If we can, perform BSWAP first and then the mask+swap the i4, then i2
  7807. // and finally the i1 pairs.
  7808. // TODO: We can easily support i4/i2 legal types if any target ever does.
  7809. if (Sz >= 8 && isPowerOf2_32(Sz)) {
  7810. // Create the masks - repeating the pattern every byte.
  7811. APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
  7812. APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
  7813. APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
  7814. // BSWAP if the type is wider than a single byte.
  7815. Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
  7816. // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
  7817. Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT));
  7818. Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask4, dl, VT));
  7819. Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT));
  7820. Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
  7821. Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
  7822. // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
  7823. Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT));
  7824. Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask2, dl, VT));
  7825. Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT));
  7826. Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
  7827. Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
  7828. // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
  7829. Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT));
  7830. Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask1, dl, VT));
  7831. Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT));
  7832. Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
  7833. Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
  7834. return Tmp;
  7835. }
  7836. Tmp = DAG.getConstant(0, dl, VT);
  7837. for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
  7838. if (I < J)
  7839. Tmp2 =
  7840. DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT));
  7841. else
  7842. Tmp2 =
  7843. DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
  7844. APInt Shift(Sz, 1);
  7845. Shift <<= J;
  7846. Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
  7847. Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
  7848. }
  7849. return Tmp;
  7850. }
  7851. SDValue TargetLowering::expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
  7852. assert(N->getOpcode() == ISD::VP_BITREVERSE);
  7853. SDLoc dl(N);
  7854. EVT VT = N->getValueType(0);
  7855. SDValue Op = N->getOperand(0);
  7856. SDValue Mask = N->getOperand(1);
  7857. SDValue EVL = N->getOperand(2);
  7858. EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
  7859. unsigned Sz = VT.getScalarSizeInBits();
  7860. SDValue Tmp, Tmp2, Tmp3;
  7861. // If we can, perform BSWAP first and then the mask+swap the i4, then i2
  7862. // and finally the i1 pairs.
  7863. // TODO: We can easily support i4/i2 legal types if any target ever does.
  7864. if (Sz >= 8 && isPowerOf2_32(Sz)) {
  7865. // Create the masks - repeating the pattern every byte.
  7866. APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
  7867. APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
  7868. APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
  7869. // BSWAP if the type is wider than a single byte.
  7870. Tmp = (Sz > 8 ? DAG.getNode(ISD::VP_BSWAP, dl, VT, Op, Mask, EVL) : Op);
  7871. // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
  7872. Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT),
  7873. Mask, EVL);
  7874. Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
  7875. DAG.getConstant(Mask4, dl, VT), Mask, EVL);
  7876. Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT),
  7877. Mask, EVL);
  7878. Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT),
  7879. Mask, EVL);
  7880. Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
  7881. // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
  7882. Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT),
  7883. Mask, EVL);
  7884. Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
  7885. DAG.getConstant(Mask2, dl, VT), Mask, EVL);
  7886. Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT),
  7887. Mask, EVL);
  7888. Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT),
  7889. Mask, EVL);
  7890. Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
  7891. // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
  7892. Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT),
  7893. Mask, EVL);
  7894. Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
  7895. DAG.getConstant(Mask1, dl, VT), Mask, EVL);
  7896. Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT),
  7897. Mask, EVL);
  7898. Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT),
  7899. Mask, EVL);
  7900. Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
  7901. return Tmp;
  7902. }
  7903. return SDValue();
  7904. }
  7905. std::pair<SDValue, SDValue>
  7906. TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
  7907. SelectionDAG &DAG) const {
  7908. SDLoc SL(LD);
  7909. SDValue Chain = LD->getChain();
  7910. SDValue BasePTR = LD->getBasePtr();
  7911. EVT SrcVT = LD->getMemoryVT();
  7912. EVT DstVT = LD->getValueType(0);
  7913. ISD::LoadExtType ExtType = LD->getExtensionType();
  7914. if (SrcVT.isScalableVector())
  7915. report_fatal_error("Cannot scalarize scalable vector loads");
  7916. unsigned NumElem = SrcVT.getVectorNumElements();
  7917. EVT SrcEltVT = SrcVT.getScalarType();
  7918. EVT DstEltVT = DstVT.getScalarType();
  7919. // A vector must always be stored in memory as-is, i.e. without any padding
  7920. // between the elements, since various code depend on it, e.g. in the
  7921. // handling of a bitcast of a vector type to int, which may be done with a
  7922. // vector store followed by an integer load. A vector that does not have
  7923. // elements that are byte-sized must therefore be stored as an integer
  7924. // built out of the extracted vector elements.
  7925. if (!SrcEltVT.isByteSized()) {
  7926. unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
  7927. EVT LoadVT = EVT::getIntegerVT(*DAG.getContext(), NumLoadBits);
  7928. unsigned NumSrcBits = SrcVT.getSizeInBits();
  7929. EVT SrcIntVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcBits);
  7930. unsigned SrcEltBits = SrcEltVT.getSizeInBits();
  7931. SDValue SrcEltBitMask = DAG.getConstant(
  7932. APInt::getLowBitsSet(NumLoadBits, SrcEltBits), SL, LoadVT);
  7933. // Load the whole vector and avoid masking off the top bits as it makes
  7934. // the codegen worse.
  7935. SDValue Load =
  7936. DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR,
  7937. LD->getPointerInfo(), SrcIntVT, LD->getOriginalAlign(),
  7938. LD->getMemOperand()->getFlags(), LD->getAAInfo());
  7939. SmallVector<SDValue, 8> Vals;
  7940. for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
  7941. unsigned ShiftIntoIdx =
  7942. (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
  7943. SDValue ShiftAmount =
  7944. DAG.getShiftAmountConstant(ShiftIntoIdx * SrcEltVT.getSizeInBits(),
  7945. LoadVT, SL, /*LegalTypes=*/false);
  7946. SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount);
  7947. SDValue Elt =
  7948. DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask);
  7949. SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, Elt);
  7950. if (ExtType != ISD::NON_EXTLOAD) {
  7951. unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType);
  7952. Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar);
  7953. }
  7954. Vals.push_back(Scalar);
  7955. }
  7956. SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
  7957. return std::make_pair(Value, Load.getValue(1));
  7958. }
  7959. unsigned Stride = SrcEltVT.getSizeInBits() / 8;
  7960. assert(SrcEltVT.isByteSized());
  7961. SmallVector<SDValue, 8> Vals;
  7962. SmallVector<SDValue, 8> LoadChains;
  7963. for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
  7964. SDValue ScalarLoad =
  7965. DAG.getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR,
  7966. LD->getPointerInfo().getWithOffset(Idx * Stride),
  7967. SrcEltVT, LD->getOriginalAlign(),
  7968. LD->getMemOperand()->getFlags(), LD->getAAInfo());
  7969. BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::Fixed(Stride));
  7970. Vals.push_back(ScalarLoad.getValue(0));
  7971. LoadChains.push_back(ScalarLoad.getValue(1));
  7972. }
  7973. SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
  7974. SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
  7975. return std::make_pair(Value, NewChain);
  7976. }
  7977. SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
  7978. SelectionDAG &DAG) const {
  7979. SDLoc SL(ST);
  7980. SDValue Chain = ST->getChain();
  7981. SDValue BasePtr = ST->getBasePtr();
  7982. SDValue Value = ST->getValue();
  7983. EVT StVT = ST->getMemoryVT();
  7984. if (StVT.isScalableVector())
  7985. report_fatal_error("Cannot scalarize scalable vector stores");
  7986. // The type of the data we want to save
  7987. EVT RegVT = Value.getValueType();
  7988. EVT RegSclVT = RegVT.getScalarType();
  7989. // The type of data as saved in memory.
  7990. EVT MemSclVT = StVT.getScalarType();
  7991. unsigned NumElem = StVT.getVectorNumElements();
  7992. // A vector must always be stored in memory as-is, i.e. without any padding
  7993. // between the elements, since various code depend on it, e.g. in the
  7994. // handling of a bitcast of a vector type to int, which may be done with a
  7995. // vector store followed by an integer load. A vector that does not have
  7996. // elements that are byte-sized must therefore be stored as an integer
  7997. // built out of the extracted vector elements.
  7998. if (!MemSclVT.isByteSized()) {
  7999. unsigned NumBits = StVT.getSizeInBits();
  8000. EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
  8001. SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
  8002. for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
  8003. SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
  8004. DAG.getVectorIdxConstant(Idx, SL));
  8005. SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
  8006. SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
  8007. unsigned ShiftIntoIdx =
  8008. (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
  8009. SDValue ShiftAmount =
  8010. DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
  8011. SDValue ShiftedElt =
  8012. DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
  8013. CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
  8014. }
  8015. return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
  8016. ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
  8017. ST->getAAInfo());
  8018. }
  8019. // Store Stride in bytes
  8020. unsigned Stride = MemSclVT.getSizeInBits() / 8;
  8021. assert(Stride && "Zero stride!");
  8022. // Extract each of the elements from the original vector and save them into
  8023. // memory individually.
  8024. SmallVector<SDValue, 8> Stores;
  8025. for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
  8026. SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
  8027. DAG.getVectorIdxConstant(Idx, SL));
  8028. SDValue Ptr =
  8029. DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::Fixed(Idx * Stride));
  8030. // This scalar TruncStore may be illegal, but we legalize it later.
  8031. SDValue Store = DAG.getTruncStore(
  8032. Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
  8033. MemSclVT, ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
  8034. ST->getAAInfo());
  8035. Stores.push_back(Store);
  8036. }
  8037. return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
  8038. }
  8039. std::pair<SDValue, SDValue>
  8040. TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
  8041. assert(LD->getAddressingMode() == ISD::UNINDEXED &&
  8042. "unaligned indexed loads not implemented!");
  8043. SDValue Chain = LD->getChain();
  8044. SDValue Ptr = LD->getBasePtr();
  8045. EVT VT = LD->getValueType(0);
  8046. EVT LoadedVT = LD->getMemoryVT();
  8047. SDLoc dl(LD);
  8048. auto &MF = DAG.getMachineFunction();
  8049. if (VT.isFloatingPoint() || VT.isVector()) {
  8050. EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
  8051. if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
  8052. if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
  8053. LoadedVT.isVector()) {
  8054. // Scalarize the load and let the individual components be handled.
  8055. return scalarizeVectorLoad(LD, DAG);
  8056. }
  8057. // Expand to a (misaligned) integer load of the same size,
  8058. // then bitconvert to floating point or vector.
  8059. SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
  8060. LD->getMemOperand());
  8061. SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
  8062. if (LoadedVT != VT)
  8063. Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
  8064. ISD::ANY_EXTEND, dl, VT, Result);
  8065. return std::make_pair(Result, newLoad.getValue(1));
  8066. }
  8067. // Copy the value to a (aligned) stack slot using (unaligned) integer
  8068. // loads and stores, then do a (aligned) load from the stack slot.
  8069. MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
  8070. unsigned LoadedBytes = LoadedVT.getStoreSize();
  8071. unsigned RegBytes = RegVT.getSizeInBits() / 8;
  8072. unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
  8073. // Make sure the stack slot is also aligned for the register type.
  8074. SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
  8075. auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
  8076. SmallVector<SDValue, 8> Stores;
  8077. SDValue StackPtr = StackBase;
  8078. unsigned Offset = 0;
  8079. EVT PtrVT = Ptr.getValueType();
  8080. EVT StackPtrVT = StackPtr.getValueType();
  8081. SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
  8082. SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
  8083. // Do all but one copies using the full register width.
  8084. for (unsigned i = 1; i < NumRegs; i++) {
  8085. // Load one integer register's worth from the original location.
  8086. SDValue Load = DAG.getLoad(
  8087. RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
  8088. LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
  8089. LD->getAAInfo());
  8090. // Follow the load with a store to the stack slot. Remember the store.
  8091. Stores.push_back(DAG.getStore(
  8092. Load.getValue(1), dl, Load, StackPtr,
  8093. MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
  8094. // Increment the pointers.
  8095. Offset += RegBytes;
  8096. Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
  8097. StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
  8098. }
  8099. // The last copy may be partial. Do an extending load.
  8100. EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
  8101. 8 * (LoadedBytes - Offset));
  8102. SDValue Load =
  8103. DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
  8104. LD->getPointerInfo().getWithOffset(Offset), MemVT,
  8105. LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
  8106. LD->getAAInfo());
  8107. // Follow the load with a store to the stack slot. Remember the store.
  8108. // On big-endian machines this requires a truncating store to ensure
  8109. // that the bits end up in the right place.
  8110. Stores.push_back(DAG.getTruncStore(
  8111. Load.getValue(1), dl, Load, StackPtr,
  8112. MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
  8113. // The order of the stores doesn't matter - say it with a TokenFactor.
  8114. SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
  8115. // Finally, perform the original load only redirected to the stack slot.
  8116. Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
  8117. MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
  8118. LoadedVT);
  8119. // Callers expect a MERGE_VALUES node.
  8120. return std::make_pair(Load, TF);
  8121. }
  8122. assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
  8123. "Unaligned load of unsupported type.");
  8124. // Compute the new VT that is half the size of the old one. This is an
  8125. // integer MVT.
  8126. unsigned NumBits = LoadedVT.getSizeInBits();
  8127. EVT NewLoadedVT;
  8128. NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
  8129. NumBits >>= 1;
  8130. Align Alignment = LD->getOriginalAlign();
  8131. unsigned IncrementSize = NumBits / 8;
  8132. ISD::LoadExtType HiExtType = LD->getExtensionType();
  8133. // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
  8134. if (HiExtType == ISD::NON_EXTLOAD)
  8135. HiExtType = ISD::ZEXTLOAD;
  8136. // Load the value in two parts
  8137. SDValue Lo, Hi;
  8138. if (DAG.getDataLayout().isLittleEndian()) {
  8139. Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
  8140. NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
  8141. LD->getAAInfo());
  8142. Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
  8143. Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
  8144. LD->getPointerInfo().getWithOffset(IncrementSize),
  8145. NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
  8146. LD->getAAInfo());
  8147. } else {
  8148. Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
  8149. NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
  8150. LD->getAAInfo());
  8151. Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
  8152. Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
  8153. LD->getPointerInfo().getWithOffset(IncrementSize),
  8154. NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
  8155. LD->getAAInfo());
  8156. }
  8157. // aggregate the two parts
  8158. SDValue ShiftAmount =
  8159. DAG.getConstant(NumBits, dl, getShiftAmountTy(Hi.getValueType(),
  8160. DAG.getDataLayout()));
  8161. SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
  8162. Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
  8163. SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
  8164. Hi.getValue(1));
  8165. return std::make_pair(Result, TF);
  8166. }
  8167. SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
  8168. SelectionDAG &DAG) const {
  8169. assert(ST->getAddressingMode() == ISD::UNINDEXED &&
  8170. "unaligned indexed stores not implemented!");
  8171. SDValue Chain = ST->getChain();
  8172. SDValue Ptr = ST->getBasePtr();
  8173. SDValue Val = ST->getValue();
  8174. EVT VT = Val.getValueType();
  8175. Align Alignment = ST->getOriginalAlign();
  8176. auto &MF = DAG.getMachineFunction();
  8177. EVT StoreMemVT = ST->getMemoryVT();
  8178. SDLoc dl(ST);
  8179. if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
  8180. EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
  8181. if (isTypeLegal(intVT)) {
  8182. if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
  8183. StoreMemVT.isVector()) {
  8184. // Scalarize the store and let the individual components be handled.
  8185. SDValue Result = scalarizeVectorStore(ST, DAG);
  8186. return Result;
  8187. }
  8188. // Expand to a bitconvert of the value to the integer type of the
  8189. // same size, then a (misaligned) int store.
  8190. // FIXME: Does not handle truncating floating point stores!
  8191. SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
  8192. Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
  8193. Alignment, ST->getMemOperand()->getFlags());
  8194. return Result;
  8195. }
  8196. // Do a (aligned) store to a stack slot, then copy from the stack slot
  8197. // to the final destination using (unaligned) integer loads and stores.
  8198. MVT RegVT = getRegisterType(
  8199. *DAG.getContext(),
  8200. EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
  8201. EVT PtrVT = Ptr.getValueType();
  8202. unsigned StoredBytes = StoreMemVT.getStoreSize();
  8203. unsigned RegBytes = RegVT.getSizeInBits() / 8;
  8204. unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
  8205. // Make sure the stack slot is also aligned for the register type.
  8206. SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
  8207. auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
  8208. // Perform the original store, only redirected to the stack slot.
  8209. SDValue Store = DAG.getTruncStore(
  8210. Chain, dl, Val, StackPtr,
  8211. MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
  8212. EVT StackPtrVT = StackPtr.getValueType();
  8213. SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
  8214. SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
  8215. SmallVector<SDValue, 8> Stores;
  8216. unsigned Offset = 0;
  8217. // Do all but one copies using the full register width.
  8218. for (unsigned i = 1; i < NumRegs; i++) {
  8219. // Load one integer register's worth from the stack slot.
  8220. SDValue Load = DAG.getLoad(
  8221. RegVT, dl, Store, StackPtr,
  8222. MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
  8223. // Store it to the final location. Remember the store.
  8224. Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
  8225. ST->getPointerInfo().getWithOffset(Offset),
  8226. ST->getOriginalAlign(),
  8227. ST->getMemOperand()->getFlags()));
  8228. // Increment the pointers.
  8229. Offset += RegBytes;
  8230. StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
  8231. Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
  8232. }
  8233. // The last store may be partial. Do a truncating store. On big-endian
  8234. // machines this requires an extending load from the stack slot to ensure
  8235. // that the bits are in the right place.
  8236. EVT LoadMemVT =
  8237. EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
  8238. // Load from the stack slot.
  8239. SDValue Load = DAG.getExtLoad(
  8240. ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
  8241. MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
  8242. Stores.push_back(
  8243. DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
  8244. ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
  8245. ST->getOriginalAlign(),
  8246. ST->getMemOperand()->getFlags(), ST->getAAInfo()));
  8247. // The order of the stores doesn't matter - say it with a TokenFactor.
  8248. SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
  8249. return Result;
  8250. }
  8251. assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
  8252. "Unaligned store of unknown type.");
  8253. // Get the half-size VT
  8254. EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
  8255. unsigned NumBits = NewStoredVT.getFixedSizeInBits();
  8256. unsigned IncrementSize = NumBits / 8;
  8257. // Divide the stored value in two parts.
  8258. SDValue ShiftAmount = DAG.getConstant(
  8259. NumBits, dl, getShiftAmountTy(Val.getValueType(), DAG.getDataLayout()));
  8260. SDValue Lo = Val;
  8261. SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
  8262. // Store the two parts
  8263. SDValue Store1, Store2;
  8264. Store1 = DAG.getTruncStore(Chain, dl,
  8265. DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
  8266. Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
  8267. ST->getMemOperand()->getFlags());
  8268. Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
  8269. Store2 = DAG.getTruncStore(
  8270. Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
  8271. ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
  8272. ST->getMemOperand()->getFlags(), ST->getAAInfo());
  8273. SDValue Result =
  8274. DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
  8275. return Result;
  8276. }
  8277. SDValue
  8278. TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
  8279. const SDLoc &DL, EVT DataVT,
  8280. SelectionDAG &DAG,
  8281. bool IsCompressedMemory) const {
  8282. SDValue Increment;
  8283. EVT AddrVT = Addr.getValueType();
  8284. EVT MaskVT = Mask.getValueType();
  8285. assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
  8286. "Incompatible types of Data and Mask");
  8287. if (IsCompressedMemory) {
  8288. if (DataVT.isScalableVector())
  8289. report_fatal_error(
  8290. "Cannot currently handle compressed memory with scalable vectors");
  8291. // Incrementing the pointer according to number of '1's in the mask.
  8292. EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
  8293. SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
  8294. if (MaskIntVT.getSizeInBits() < 32) {
  8295. MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
  8296. MaskIntVT = MVT::i32;
  8297. }
  8298. // Count '1's with POPCNT.
  8299. Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
  8300. Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
  8301. // Scale is an element size in bytes.
  8302. SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
  8303. AddrVT);
  8304. Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
  8305. } else if (DataVT.isScalableVector()) {
  8306. Increment = DAG.getVScale(DL, AddrVT,
  8307. APInt(AddrVT.getFixedSizeInBits(),
  8308. DataVT.getStoreSize().getKnownMinValue()));
  8309. } else
  8310. Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
  8311. return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
  8312. }
  8313. static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx,
  8314. EVT VecVT, const SDLoc &dl,
  8315. ElementCount SubEC) {
  8316. assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
  8317. "Cannot index a scalable vector within a fixed-width vector");
  8318. unsigned NElts = VecVT.getVectorMinNumElements();
  8319. unsigned NumSubElts = SubEC.getKnownMinValue();
  8320. EVT IdxVT = Idx.getValueType();
  8321. if (VecVT.isScalableVector() && !SubEC.isScalable()) {
  8322. // If this is a constant index and we know the value plus the number of the
  8323. // elements in the subvector minus one is less than the minimum number of
  8324. // elements then it's safe to return Idx.
  8325. if (auto *IdxCst = dyn_cast<ConstantSDNode>(Idx))
  8326. if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts)
  8327. return Idx;
  8328. SDValue VS =
  8329. DAG.getVScale(dl, IdxVT, APInt(IdxVT.getFixedSizeInBits(), NElts));
  8330. unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
  8331. SDValue Sub = DAG.getNode(SubOpcode, dl, IdxVT, VS,
  8332. DAG.getConstant(NumSubElts, dl, IdxVT));
  8333. return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, Sub);
  8334. }
  8335. if (isPowerOf2_32(NElts) && NumSubElts == 1) {
  8336. APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), Log2_32(NElts));
  8337. return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
  8338. DAG.getConstant(Imm, dl, IdxVT));
  8339. }
  8340. unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0;
  8341. return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
  8342. DAG.getConstant(MaxIndex, dl, IdxVT));
  8343. }
  8344. SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
  8345. SDValue VecPtr, EVT VecVT,
  8346. SDValue Index) const {
  8347. return getVectorSubVecPointer(
  8348. DAG, VecPtr, VecVT,
  8349. EVT::getVectorVT(*DAG.getContext(), VecVT.getVectorElementType(), 1),
  8350. Index);
  8351. }
  8352. SDValue TargetLowering::getVectorSubVecPointer(SelectionDAG &DAG,
  8353. SDValue VecPtr, EVT VecVT,
  8354. EVT SubVecVT,
  8355. SDValue Index) const {
  8356. SDLoc dl(Index);
  8357. // Make sure the index type is big enough to compute in.
  8358. Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
  8359. EVT EltVT = VecVT.getVectorElementType();
  8360. // Calculate the element offset and add it to the pointer.
  8361. unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
  8362. assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
  8363. "Converting bits to bytes lost precision");
  8364. assert(SubVecVT.getVectorElementType() == EltVT &&
  8365. "Sub-vector must be a vector with matching element type");
  8366. Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
  8367. SubVecVT.getVectorElementCount());
  8368. EVT IdxVT = Index.getValueType();
  8369. if (SubVecVT.isScalableVector())
  8370. Index =
  8371. DAG.getNode(ISD::MUL, dl, IdxVT, Index,
  8372. DAG.getVScale(dl, IdxVT, APInt(IdxVT.getSizeInBits(), 1)));
  8373. Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
  8374. DAG.getConstant(EltSize, dl, IdxVT));
  8375. return DAG.getMemBasePlusOffset(VecPtr, Index, dl);
  8376. }
  8377. //===----------------------------------------------------------------------===//
  8378. // Implementation of Emulated TLS Model
  8379. //===----------------------------------------------------------------------===//
  8380. SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
  8381. SelectionDAG &DAG) const {
  8382. // Access to address of TLS varialbe xyz is lowered to a function call:
  8383. // __emutls_get_address( address of global variable named "__emutls_v.xyz" )
  8384. EVT PtrVT = getPointerTy(DAG.getDataLayout());
  8385. PointerType *VoidPtrType = Type::getInt8PtrTy(*DAG.getContext());
  8386. SDLoc dl(GA);
  8387. ArgListTy Args;
  8388. ArgListEntry Entry;
  8389. std::string NameString = ("__emutls_v." + GA->getGlobal()->getName()).str();
  8390. Module *VariableModule = const_cast<Module*>(GA->getGlobal()->getParent());
  8391. StringRef EmuTlsVarName(NameString);
  8392. GlobalVariable *EmuTlsVar = VariableModule->getNamedGlobal(EmuTlsVarName);
  8393. assert(EmuTlsVar && "Cannot find EmuTlsVar ");
  8394. Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT);
  8395. Entry.Ty = VoidPtrType;
  8396. Args.push_back(Entry);
  8397. SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
  8398. TargetLowering::CallLoweringInfo CLI(DAG);
  8399. CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
  8400. CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
  8401. std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
  8402. // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
  8403. // At last for X86 targets, maybe good for other targets too?
  8404. MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
  8405. MFI.setAdjustsStack(true); // Is this only for X86 target?
  8406. MFI.setHasCalls(true);
  8407. assert((GA->getOffset() == 0) &&
  8408. "Emulated TLS must have zero offset in GlobalAddressSDNode");
  8409. return CallResult.first;
  8410. }
  8411. SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
  8412. SelectionDAG &DAG) const {
  8413. assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
  8414. if (!isCtlzFast())
  8415. return SDValue();
  8416. ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
  8417. SDLoc dl(Op);
  8418. if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
  8419. if (C->isZero() && CC == ISD::SETEQ) {
  8420. EVT VT = Op.getOperand(0).getValueType();
  8421. SDValue Zext = Op.getOperand(0);
  8422. if (VT.bitsLT(MVT::i32)) {
  8423. VT = MVT::i32;
  8424. Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
  8425. }
  8426. unsigned Log2b = Log2_32(VT.getSizeInBits());
  8427. SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
  8428. SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
  8429. DAG.getConstant(Log2b, dl, MVT::i32));
  8430. return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
  8431. }
  8432. }
  8433. return SDValue();
  8434. }
  8435. SDValue TargetLowering::expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const {
  8436. SDValue Op0 = Node->getOperand(0);
  8437. SDValue Op1 = Node->getOperand(1);
  8438. EVT VT = Op0.getValueType();
  8439. unsigned Opcode = Node->getOpcode();
  8440. SDLoc DL(Node);
  8441. // umin(x,y) -> sub(x,usubsat(x,y))
  8442. if (Opcode == ISD::UMIN && isOperationLegal(ISD::SUB, VT) &&
  8443. isOperationLegal(ISD::USUBSAT, VT)) {
  8444. return DAG.getNode(ISD::SUB, DL, VT, Op0,
  8445. DAG.getNode(ISD::USUBSAT, DL, VT, Op0, Op1));
  8446. }
  8447. // umax(x,y) -> add(x,usubsat(y,x))
  8448. if (Opcode == ISD::UMAX && isOperationLegal(ISD::ADD, VT) &&
  8449. isOperationLegal(ISD::USUBSAT, VT)) {
  8450. return DAG.getNode(ISD::ADD, DL, VT, Op0,
  8451. DAG.getNode(ISD::USUBSAT, DL, VT, Op1, Op0));
  8452. }
  8453. // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
  8454. ISD::CondCode CC;
  8455. switch (Opcode) {
  8456. default: llvm_unreachable("How did we get here?");
  8457. case ISD::SMAX: CC = ISD::SETGT; break;
  8458. case ISD::SMIN: CC = ISD::SETLT; break;
  8459. case ISD::UMAX: CC = ISD::SETUGT; break;
  8460. case ISD::UMIN: CC = ISD::SETULT; break;
  8461. }
  8462. // FIXME: Should really try to split the vector in case it's legal on a
  8463. // subvector.
  8464. if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
  8465. return DAG.UnrollVectorOp(Node);
  8466. EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
  8467. SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
  8468. return DAG.getSelect(DL, VT, Cond, Op0, Op1);
  8469. }
  8470. SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
  8471. unsigned Opcode = Node->getOpcode();
  8472. SDValue LHS = Node->getOperand(0);
  8473. SDValue RHS = Node->getOperand(1);
  8474. EVT VT = LHS.getValueType();
  8475. SDLoc dl(Node);
  8476. assert(VT == RHS.getValueType() && "Expected operands to be the same type");
  8477. assert(VT.isInteger() && "Expected operands to be integers");
  8478. // usub.sat(a, b) -> umax(a, b) - b
  8479. if (Opcode == ISD::USUBSAT && isOperationLegal(ISD::UMAX, VT)) {
  8480. SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
  8481. return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
  8482. }
  8483. // uadd.sat(a, b) -> umin(a, ~b) + b
  8484. if (Opcode == ISD::UADDSAT && isOperationLegal(ISD::UMIN, VT)) {
  8485. SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
  8486. SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
  8487. return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
  8488. }
  8489. unsigned OverflowOp;
  8490. switch (Opcode) {
  8491. case ISD::SADDSAT:
  8492. OverflowOp = ISD::SADDO;
  8493. break;
  8494. case ISD::UADDSAT:
  8495. OverflowOp = ISD::UADDO;
  8496. break;
  8497. case ISD::SSUBSAT:
  8498. OverflowOp = ISD::SSUBO;
  8499. break;
  8500. case ISD::USUBSAT:
  8501. OverflowOp = ISD::USUBO;
  8502. break;
  8503. default:
  8504. llvm_unreachable("Expected method to receive signed or unsigned saturation "
  8505. "addition or subtraction node.");
  8506. }
  8507. // FIXME: Should really try to split the vector in case it's legal on a
  8508. // subvector.
  8509. if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
  8510. return DAG.UnrollVectorOp(Node);
  8511. unsigned BitWidth = LHS.getScalarValueSizeInBits();
  8512. EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
  8513. SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
  8514. SDValue SumDiff = Result.getValue(0);
  8515. SDValue Overflow = Result.getValue(1);
  8516. SDValue Zero = DAG.getConstant(0, dl, VT);
  8517. SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
  8518. if (Opcode == ISD::UADDSAT) {
  8519. if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
  8520. // (LHS + RHS) | OverflowMask
  8521. SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
  8522. return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
  8523. }
  8524. // Overflow ? 0xffff.... : (LHS + RHS)
  8525. return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
  8526. }
  8527. if (Opcode == ISD::USUBSAT) {
  8528. if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
  8529. // (LHS - RHS) & ~OverflowMask
  8530. SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
  8531. SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
  8532. return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
  8533. }
  8534. // Overflow ? 0 : (LHS - RHS)
  8535. return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
  8536. }
  8537. // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
  8538. APInt MinVal = APInt::getSignedMinValue(BitWidth);
  8539. SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
  8540. SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, SumDiff,
  8541. DAG.getConstant(BitWidth - 1, dl, VT));
  8542. Result = DAG.getNode(ISD::XOR, dl, VT, Shift, SatMin);
  8543. return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
  8544. }
  8545. SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const {
  8546. unsigned Opcode = Node->getOpcode();
  8547. bool IsSigned = Opcode == ISD::SSHLSAT;
  8548. SDValue LHS = Node->getOperand(0);
  8549. SDValue RHS = Node->getOperand(1);
  8550. EVT VT = LHS.getValueType();
  8551. SDLoc dl(Node);
  8552. assert((Node->getOpcode() == ISD::SSHLSAT ||
  8553. Node->getOpcode() == ISD::USHLSAT) &&
  8554. "Expected a SHLSAT opcode");
  8555. assert(VT == RHS.getValueType() && "Expected operands to be the same type");
  8556. assert(VT.isInteger() && "Expected operands to be integers");
  8557. if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
  8558. return DAG.UnrollVectorOp(Node);
  8559. // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
  8560. unsigned BW = VT.getScalarSizeInBits();
  8561. EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
  8562. SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS);
  8563. SDValue Orig =
  8564. DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS);
  8565. SDValue SatVal;
  8566. if (IsSigned) {
  8567. SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT);
  8568. SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT);
  8569. SDValue Cond =
  8570. DAG.getSetCC(dl, BoolVT, LHS, DAG.getConstant(0, dl, VT), ISD::SETLT);
  8571. SatVal = DAG.getSelect(dl, VT, Cond, SatMin, SatMax);
  8572. } else {
  8573. SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT);
  8574. }
  8575. SDValue Cond = DAG.getSetCC(dl, BoolVT, LHS, Orig, ISD::SETNE);
  8576. return DAG.getSelect(dl, VT, Cond, SatVal, Result);
  8577. }
  8578. SDValue
  8579. TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
  8580. assert((Node->getOpcode() == ISD::SMULFIX ||
  8581. Node->getOpcode() == ISD::UMULFIX ||
  8582. Node->getOpcode() == ISD::SMULFIXSAT ||
  8583. Node->getOpcode() == ISD::UMULFIXSAT) &&
  8584. "Expected a fixed point multiplication opcode");
  8585. SDLoc dl(Node);
  8586. SDValue LHS = Node->getOperand(0);
  8587. SDValue RHS = Node->getOperand(1);
  8588. EVT VT = LHS.getValueType();
  8589. unsigned Scale = Node->getConstantOperandVal(2);
  8590. bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
  8591. Node->getOpcode() == ISD::UMULFIXSAT);
  8592. bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
  8593. Node->getOpcode() == ISD::SMULFIXSAT);
  8594. EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
  8595. unsigned VTSize = VT.getScalarSizeInBits();
  8596. if (!Scale) {
  8597. // [us]mul.fix(a, b, 0) -> mul(a, b)
  8598. if (!Saturating) {
  8599. if (isOperationLegalOrCustom(ISD::MUL, VT))
  8600. return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
  8601. } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
  8602. SDValue Result =
  8603. DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
  8604. SDValue Product = Result.getValue(0);
  8605. SDValue Overflow = Result.getValue(1);
  8606. SDValue Zero = DAG.getConstant(0, dl, VT);
  8607. APInt MinVal = APInt::getSignedMinValue(VTSize);
  8608. APInt MaxVal = APInt::getSignedMaxValue(VTSize);
  8609. SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
  8610. SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
  8611. // Xor the inputs, if resulting sign bit is 0 the product will be
  8612. // positive, else negative.
  8613. SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
  8614. SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
  8615. Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
  8616. return DAG.getSelect(dl, VT, Overflow, Result, Product);
  8617. } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
  8618. SDValue Result =
  8619. DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
  8620. SDValue Product = Result.getValue(0);
  8621. SDValue Overflow = Result.getValue(1);
  8622. APInt MaxVal = APInt::getMaxValue(VTSize);
  8623. SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
  8624. return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
  8625. }
  8626. }
  8627. assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
  8628. "Expected scale to be less than the number of bits if signed or at "
  8629. "most the number of bits if unsigned.");
  8630. assert(LHS.getValueType() == RHS.getValueType() &&
  8631. "Expected both operands to be the same type");
  8632. // Get the upper and lower bits of the result.
  8633. SDValue Lo, Hi;
  8634. unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
  8635. unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
  8636. if (isOperationLegalOrCustom(LoHiOp, VT)) {
  8637. SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
  8638. Lo = Result.getValue(0);
  8639. Hi = Result.getValue(1);
  8640. } else if (isOperationLegalOrCustom(HiOp, VT)) {
  8641. Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
  8642. Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
  8643. } else if (VT.isVector()) {
  8644. return SDValue();
  8645. } else {
  8646. report_fatal_error("Unable to expand fixed point multiplication.");
  8647. }
  8648. if (Scale == VTSize)
  8649. // Result is just the top half since we'd be shifting by the width of the
  8650. // operand. Overflow impossible so this works for both UMULFIX and
  8651. // UMULFIXSAT.
  8652. return Hi;
  8653. // The result will need to be shifted right by the scale since both operands
  8654. // are scaled. The result is given to us in 2 halves, so we only want part of
  8655. // both in the result.
  8656. EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout());
  8657. SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
  8658. DAG.getConstant(Scale, dl, ShiftTy));
  8659. if (!Saturating)
  8660. return Result;
  8661. if (!Signed) {
  8662. // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
  8663. // widened multiplication) aren't all zeroes.
  8664. // Saturate to max if ((Hi >> Scale) != 0),
  8665. // which is the same as if (Hi > ((1 << Scale) - 1))
  8666. APInt MaxVal = APInt::getMaxValue(VTSize);
  8667. SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
  8668. dl, VT);
  8669. Result = DAG.getSelectCC(dl, Hi, LowMask,
  8670. DAG.getConstant(MaxVal, dl, VT), Result,
  8671. ISD::SETUGT);
  8672. return Result;
  8673. }
  8674. // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
  8675. // widened multiplication) aren't all ones or all zeroes.
  8676. SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
  8677. SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
  8678. if (Scale == 0) {
  8679. SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
  8680. DAG.getConstant(VTSize - 1, dl, ShiftTy));
  8681. SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
  8682. // Saturated to SatMin if wide product is negative, and SatMax if wide
  8683. // product is positive ...
  8684. SDValue Zero = DAG.getConstant(0, dl, VT);
  8685. SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
  8686. ISD::SETLT);
  8687. // ... but only if we overflowed.
  8688. return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
  8689. }
  8690. // We handled Scale==0 above so all the bits to examine is in Hi.
  8691. // Saturate to max if ((Hi >> (Scale - 1)) > 0),
  8692. // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
  8693. SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
  8694. dl, VT);
  8695. Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
  8696. // Saturate to min if (Hi >> (Scale - 1)) < -1),
  8697. // which is the same as if (HI < (-1 << (Scale - 1))
  8698. SDValue HighMask =
  8699. DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
  8700. dl, VT);
  8701. Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
  8702. return Result;
  8703. }
  8704. SDValue
  8705. TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
  8706. SDValue LHS, SDValue RHS,
  8707. unsigned Scale, SelectionDAG &DAG) const {
  8708. assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
  8709. Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
  8710. "Expected a fixed point division opcode");
  8711. EVT VT = LHS.getValueType();
  8712. bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
  8713. bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
  8714. EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
  8715. // If there is enough room in the type to upscale the LHS or downscale the
  8716. // RHS before the division, we can perform it in this type without having to
  8717. // resize. For signed operations, the LHS headroom is the number of
  8718. // redundant sign bits, and for unsigned ones it is the number of zeroes.
  8719. // The headroom for the RHS is the number of trailing zeroes.
  8720. unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1
  8721. : DAG.computeKnownBits(LHS).countMinLeadingZeros();
  8722. unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
  8723. // For signed saturating operations, we need to be able to detect true integer
  8724. // division overflow; that is, when you have MIN / -EPS. However, this
  8725. // is undefined behavior and if we emit divisions that could take such
  8726. // values it may cause undesired behavior (arithmetic exceptions on x86, for
  8727. // example).
  8728. // Avoid this by requiring an extra bit so that we never get this case.
  8729. // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
  8730. // signed saturating division, we need to emit a whopping 32-bit division.
  8731. if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
  8732. return SDValue();
  8733. unsigned LHSShift = std::min(LHSLead, Scale);
  8734. unsigned RHSShift = Scale - LHSShift;
  8735. // At this point, we know that if we shift the LHS up by LHSShift and the
  8736. // RHS down by RHSShift, we can emit a regular division with a final scaling
  8737. // factor of Scale.
  8738. EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout());
  8739. if (LHSShift)
  8740. LHS = DAG.getNode(ISD::SHL, dl, VT, LHS,
  8741. DAG.getConstant(LHSShift, dl, ShiftTy));
  8742. if (RHSShift)
  8743. RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS,
  8744. DAG.getConstant(RHSShift, dl, ShiftTy));
  8745. SDValue Quot;
  8746. if (Signed) {
  8747. // For signed operations, if the resulting quotient is negative and the
  8748. // remainder is nonzero, subtract 1 from the quotient to round towards
  8749. // negative infinity.
  8750. SDValue Rem;
  8751. // FIXME: Ideally we would always produce an SDIVREM here, but if the
  8752. // type isn't legal, SDIVREM cannot be expanded. There is no reason why
  8753. // we couldn't just form a libcall, but the type legalizer doesn't do it.
  8754. if (isTypeLegal(VT) &&
  8755. isOperationLegalOrCustom(ISD::SDIVREM, VT)) {
  8756. Quot = DAG.getNode(ISD::SDIVREM, dl,
  8757. DAG.getVTList(VT, VT),
  8758. LHS, RHS);
  8759. Rem = Quot.getValue(1);
  8760. Quot = Quot.getValue(0);
  8761. } else {
  8762. Quot = DAG.getNode(ISD::SDIV, dl, VT,
  8763. LHS, RHS);
  8764. Rem = DAG.getNode(ISD::SREM, dl, VT,
  8765. LHS, RHS);
  8766. }
  8767. SDValue Zero = DAG.getConstant(0, dl, VT);
  8768. SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE);
  8769. SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT);
  8770. SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT);
  8771. SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg);
  8772. SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot,
  8773. DAG.getConstant(1, dl, VT));
  8774. Quot = DAG.getSelect(dl, VT,
  8775. DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg),
  8776. Sub1, Quot);
  8777. } else
  8778. Quot = DAG.getNode(ISD::UDIV, dl, VT,
  8779. LHS, RHS);
  8780. return Quot;
  8781. }
  8782. void TargetLowering::expandUADDSUBO(
  8783. SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
  8784. SDLoc dl(Node);
  8785. SDValue LHS = Node->getOperand(0);
  8786. SDValue RHS = Node->getOperand(1);
  8787. bool IsAdd = Node->getOpcode() == ISD::UADDO;
  8788. // If ADD/SUBCARRY is legal, use that instead.
  8789. unsigned OpcCarry = IsAdd ? ISD::ADDCARRY : ISD::SUBCARRY;
  8790. if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
  8791. SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
  8792. SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
  8793. { LHS, RHS, CarryIn });
  8794. Result = SDValue(NodeCarry.getNode(), 0);
  8795. Overflow = SDValue(NodeCarry.getNode(), 1);
  8796. return;
  8797. }
  8798. Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
  8799. LHS.getValueType(), LHS, RHS);
  8800. EVT ResultType = Node->getValueType(1);
  8801. EVT SetCCType = getSetCCResultType(
  8802. DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
  8803. SDValue SetCC;
  8804. if (IsAdd && isOneConstant(RHS)) {
  8805. // Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
  8806. // the live range of X. We assume comparing with 0 is cheap.
  8807. // The general case (X + C) < C is not necessarily beneficial. Although we
  8808. // reduce the live range of X, we may introduce the materialization of
  8809. // constant C.
  8810. SetCC =
  8811. DAG.getSetCC(dl, SetCCType, Result,
  8812. DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETEQ);
  8813. } else {
  8814. ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
  8815. SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
  8816. }
  8817. Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
  8818. }
  8819. void TargetLowering::expandSADDSUBO(
  8820. SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
  8821. SDLoc dl(Node);
  8822. SDValue LHS = Node->getOperand(0);
  8823. SDValue RHS = Node->getOperand(1);
  8824. bool IsAdd = Node->getOpcode() == ISD::SADDO;
  8825. Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
  8826. LHS.getValueType(), LHS, RHS);
  8827. EVT ResultType = Node->getValueType(1);
  8828. EVT OType = getSetCCResultType(
  8829. DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
  8830. // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
  8831. unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
  8832. if (isOperationLegal(OpcSat, LHS.getValueType())) {
  8833. SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
  8834. SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
  8835. Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
  8836. return;
  8837. }
  8838. SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
  8839. // For an addition, the result should be less than one of the operands (LHS)
  8840. // if and only if the other operand (RHS) is negative, otherwise there will
  8841. // be overflow.
  8842. // For a subtraction, the result should be less than one of the operands
  8843. // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
  8844. // otherwise there will be overflow.
  8845. SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
  8846. SDValue ConditionRHS =
  8847. DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT);
  8848. Overflow = DAG.getBoolExtOrTrunc(
  8849. DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl,
  8850. ResultType, ResultType);
  8851. }
  8852. bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
  8853. SDValue &Overflow, SelectionDAG &DAG) const {
  8854. SDLoc dl(Node);
  8855. EVT VT = Node->getValueType(0);
  8856. EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
  8857. SDValue LHS = Node->getOperand(0);
  8858. SDValue RHS = Node->getOperand(1);
  8859. bool isSigned = Node->getOpcode() == ISD::SMULO;
  8860. // For power-of-two multiplications we can use a simpler shift expansion.
  8861. if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
  8862. const APInt &C = RHSC->getAPIntValue();
  8863. // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
  8864. if (C.isPowerOf2()) {
  8865. // smulo(x, signed_min) is same as umulo(x, signed_min).
  8866. bool UseArithShift = isSigned && !C.isMinSignedValue();
  8867. EVT ShiftAmtTy = getShiftAmountTy(VT, DAG.getDataLayout());
  8868. SDValue ShiftAmt = DAG.getConstant(C.logBase2(), dl, ShiftAmtTy);
  8869. Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
  8870. Overflow = DAG.getSetCC(dl, SetCCVT,
  8871. DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
  8872. dl, VT, Result, ShiftAmt),
  8873. LHS, ISD::SETNE);
  8874. return true;
  8875. }
  8876. }
  8877. EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
  8878. if (VT.isVector())
  8879. WideVT =
  8880. EVT::getVectorVT(*DAG.getContext(), WideVT, VT.getVectorElementCount());
  8881. SDValue BottomHalf;
  8882. SDValue TopHalf;
  8883. static const unsigned Ops[2][3] =
  8884. { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
  8885. { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
  8886. if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
  8887. BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
  8888. TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
  8889. } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
  8890. BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
  8891. RHS);
  8892. TopHalf = BottomHalf.getValue(1);
  8893. } else if (isTypeLegal(WideVT)) {
  8894. LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
  8895. RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
  8896. SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
  8897. BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
  8898. SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits(), dl,
  8899. getShiftAmountTy(WideVT, DAG.getDataLayout()));
  8900. TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
  8901. DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
  8902. } else {
  8903. if (VT.isVector())
  8904. return false;
  8905. // We can fall back to a libcall with an illegal type for the MUL if we
  8906. // have a libcall big enough.
  8907. // Also, we can fall back to a division in some cases, but that's a big
  8908. // performance hit in the general case.
  8909. RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
  8910. if (WideVT == MVT::i16)
  8911. LC = RTLIB::MUL_I16;
  8912. else if (WideVT == MVT::i32)
  8913. LC = RTLIB::MUL_I32;
  8914. else if (WideVT == MVT::i64)
  8915. LC = RTLIB::MUL_I64;
  8916. else if (WideVT == MVT::i128)
  8917. LC = RTLIB::MUL_I128;
  8918. assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!");
  8919. SDValue HiLHS;
  8920. SDValue HiRHS;
  8921. if (isSigned) {
  8922. // The high part is obtained by SRA'ing all but one of the bits of low
  8923. // part.
  8924. unsigned LoSize = VT.getFixedSizeInBits();
  8925. HiLHS =
  8926. DAG.getNode(ISD::SRA, dl, VT, LHS,
  8927. DAG.getConstant(LoSize - 1, dl,
  8928. getPointerTy(DAG.getDataLayout())));
  8929. HiRHS =
  8930. DAG.getNode(ISD::SRA, dl, VT, RHS,
  8931. DAG.getConstant(LoSize - 1, dl,
  8932. getPointerTy(DAG.getDataLayout())));
  8933. } else {
  8934. HiLHS = DAG.getConstant(0, dl, VT);
  8935. HiRHS = DAG.getConstant(0, dl, VT);
  8936. }
  8937. // Here we're passing the 2 arguments explicitly as 4 arguments that are
  8938. // pre-lowered to the correct types. This all depends upon WideVT not
  8939. // being a legal type for the architecture and thus has to be split to
  8940. // two arguments.
  8941. SDValue Ret;
  8942. TargetLowering::MakeLibCallOptions CallOptions;
  8943. CallOptions.setSExt(isSigned);
  8944. CallOptions.setIsPostTypeLegalization(true);
  8945. if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
  8946. // Halves of WideVT are packed into registers in different order
  8947. // depending on platform endianness. This is usually handled by
  8948. // the C calling convention, but we can't defer to it in
  8949. // the legalizer.
  8950. SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };
  8951. Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
  8952. } else {
  8953. SDValue Args[] = { HiLHS, LHS, HiRHS, RHS };
  8954. Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
  8955. }
  8956. assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
  8957. "Ret value is a collection of constituent nodes holding result.");
  8958. if (DAG.getDataLayout().isLittleEndian()) {
  8959. // Same as above.
  8960. BottomHalf = Ret.getOperand(0);
  8961. TopHalf = Ret.getOperand(1);
  8962. } else {
  8963. BottomHalf = Ret.getOperand(1);
  8964. TopHalf = Ret.getOperand(0);
  8965. }
  8966. }
  8967. Result = BottomHalf;
  8968. if (isSigned) {
  8969. SDValue ShiftAmt = DAG.getConstant(
  8970. VT.getScalarSizeInBits() - 1, dl,
  8971. getShiftAmountTy(BottomHalf.getValueType(), DAG.getDataLayout()));
  8972. SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
  8973. Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
  8974. } else {
  8975. Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
  8976. DAG.getConstant(0, dl, VT), ISD::SETNE);
  8977. }
  8978. // Truncate the result if SetCC returns a larger type than needed.
  8979. EVT RType = Node->getValueType(1);
  8980. if (RType.bitsLT(Overflow.getValueType()))
  8981. Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
  8982. assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
  8983. "Unexpected result type for S/UMULO legalization");
  8984. return true;
  8985. }
  8986. SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
  8987. SDLoc dl(Node);
  8988. unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
  8989. SDValue Op = Node->getOperand(0);
  8990. EVT VT = Op.getValueType();
  8991. if (VT.isScalableVector())
  8992. report_fatal_error(
  8993. "Expanding reductions for scalable vectors is undefined.");
  8994. // Try to use a shuffle reduction for power of two vectors.
  8995. if (VT.isPow2VectorType()) {
  8996. while (VT.getVectorNumElements() > 1) {
  8997. EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
  8998. if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
  8999. break;
  9000. SDValue Lo, Hi;
  9001. std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
  9002. Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi);
  9003. VT = HalfVT;
  9004. }
  9005. }
  9006. EVT EltVT = VT.getVectorElementType();
  9007. unsigned NumElts = VT.getVectorNumElements();
  9008. SmallVector<SDValue, 8> Ops;
  9009. DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
  9010. SDValue Res = Ops[0];
  9011. for (unsigned i = 1; i < NumElts; i++)
  9012. Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());
  9013. // Result type may be wider than element type.
  9014. if (EltVT != Node->getValueType(0))
  9015. Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
  9016. return Res;
  9017. }
  9018. SDValue TargetLowering::expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const {
  9019. SDLoc dl(Node);
  9020. SDValue AccOp = Node->getOperand(0);
  9021. SDValue VecOp = Node->getOperand(1);
  9022. SDNodeFlags Flags = Node->getFlags();
  9023. EVT VT = VecOp.getValueType();
  9024. EVT EltVT = VT.getVectorElementType();
  9025. if (VT.isScalableVector())
  9026. report_fatal_error(
  9027. "Expanding reductions for scalable vectors is undefined.");
  9028. unsigned NumElts = VT.getVectorNumElements();
  9029. SmallVector<SDValue, 8> Ops;
  9030. DAG.ExtractVectorElements(VecOp, Ops, 0, NumElts);
  9031. unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
  9032. SDValue Res = AccOp;
  9033. for (unsigned i = 0; i < NumElts; i++)
  9034. Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);
  9035. return Res;
  9036. }
  9037. bool TargetLowering::expandREM(SDNode *Node, SDValue &Result,
  9038. SelectionDAG &DAG) const {
  9039. EVT VT = Node->getValueType(0);
  9040. SDLoc dl(Node);
  9041. bool isSigned = Node->getOpcode() == ISD::SREM;
  9042. unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
  9043. unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
  9044. SDValue Dividend = Node->getOperand(0);
  9045. SDValue Divisor = Node->getOperand(1);
  9046. if (isOperationLegalOrCustom(DivRemOpc, VT)) {
  9047. SDVTList VTs = DAG.getVTList(VT, VT);
  9048. Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1);
  9049. return true;
  9050. }
  9051. if (isOperationLegalOrCustom(DivOpc, VT)) {
  9052. // X % Y -> X-X/Y*Y
  9053. SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor);
  9054. SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor);
  9055. Result = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
  9056. return true;
  9057. }
  9058. return false;
  9059. }
  9060. SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
  9061. SelectionDAG &DAG) const {
  9062. bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
  9063. SDLoc dl(SDValue(Node, 0));
  9064. SDValue Src = Node->getOperand(0);
  9065. // DstVT is the result type, while SatVT is the size to which we saturate
  9066. EVT SrcVT = Src.getValueType();
  9067. EVT DstVT = Node->getValueType(0);
  9068. EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
  9069. unsigned SatWidth = SatVT.getScalarSizeInBits();
  9070. unsigned DstWidth = DstVT.getScalarSizeInBits();
  9071. assert(SatWidth <= DstWidth &&
  9072. "Expected saturation width smaller than result width");
  9073. // Determine minimum and maximum integer values and their corresponding
  9074. // floating-point values.
  9075. APInt MinInt, MaxInt;
  9076. if (IsSigned) {
  9077. MinInt = APInt::getSignedMinValue(SatWidth).sext(DstWidth);
  9078. MaxInt = APInt::getSignedMaxValue(SatWidth).sext(DstWidth);
  9079. } else {
  9080. MinInt = APInt::getMinValue(SatWidth).zext(DstWidth);
  9081. MaxInt = APInt::getMaxValue(SatWidth).zext(DstWidth);
  9082. }
  9083. // We cannot risk emitting FP_TO_XINT nodes with a source VT of f16, as
  9084. // libcall emission cannot handle this. Large result types will fail.
  9085. if (SrcVT == MVT::f16) {
  9086. Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);
  9087. SrcVT = Src.getValueType();
  9088. }
  9089. APFloat MinFloat(DAG.EVTToAPFloatSemantics(SrcVT));
  9090. APFloat MaxFloat(DAG.EVTToAPFloatSemantics(SrcVT));
  9091. APFloat::opStatus MinStatus =
  9092. MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
  9093. APFloat::opStatus MaxStatus =
  9094. MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
  9095. bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
  9096. !(MaxStatus & APFloat::opStatus::opInexact);
  9097. SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
  9098. SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);
  9099. // If the integer bounds are exactly representable as floats and min/max are
  9100. // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
  9101. // of comparisons and selects.
  9102. bool MinMaxLegal = isOperationLegal(ISD::FMINNUM, SrcVT) &&
  9103. isOperationLegal(ISD::FMAXNUM, SrcVT);
  9104. if (AreExactFloatBounds && MinMaxLegal) {
  9105. SDValue Clamped = Src;
  9106. // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
  9107. Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, MinFloatNode);
  9108. // Clamp by MaxFloat from above. NaN cannot occur.
  9109. Clamped = DAG.getNode(ISD::FMINNUM, dl, SrcVT, Clamped, MaxFloatNode);
  9110. // Convert clamped value to integer.
  9111. SDValue FpToInt = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
  9112. dl, DstVT, Clamped);
  9113. // In the unsigned case we're done, because we mapped NaN to MinFloat,
  9114. // which will cast to zero.
  9115. if (!IsSigned)
  9116. return FpToInt;
  9117. // Otherwise, select 0 if Src is NaN.
  9118. SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
  9119. return DAG.getSelectCC(dl, Src, Src, ZeroInt, FpToInt,
  9120. ISD::CondCode::SETUO);
  9121. }
  9122. SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
  9123. SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT);
  9124. // Result of direct conversion. The assumption here is that the operation is
  9125. // non-trapping and it's fine to apply it to an out-of-range value if we
  9126. // select it away later.
  9127. SDValue FpToInt =
  9128. DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, DstVT, Src);
  9129. SDValue Select = FpToInt;
  9130. // If Src ULT MinFloat, select MinInt. In particular, this also selects
  9131. // MinInt if Src is NaN.
  9132. Select = DAG.getSelectCC(dl, Src, MinFloatNode, MinIntNode, Select,
  9133. ISD::CondCode::SETULT);
  9134. // If Src OGT MaxFloat, select MaxInt.
  9135. Select = DAG.getSelectCC(dl, Src, MaxFloatNode, MaxIntNode, Select,
  9136. ISD::CondCode::SETOGT);
  9137. // In the unsigned case we are done, because we mapped NaN to MinInt, which
  9138. // is already zero.
  9139. if (!IsSigned)
  9140. return Select;
  9141. // Otherwise, select 0 if Src is NaN.
  9142. SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
  9143. return DAG.getSelectCC(dl, Src, Src, ZeroInt, Select, ISD::CondCode::SETUO);
  9144. }
  9145. SDValue TargetLowering::expandVectorSplice(SDNode *Node,
  9146. SelectionDAG &DAG) const {
  9147. assert(Node->getOpcode() == ISD::VECTOR_SPLICE && "Unexpected opcode!");
  9148. assert(Node->getValueType(0).isScalableVector() &&
  9149. "Fixed length vector types expected to use SHUFFLE_VECTOR!");
  9150. EVT VT = Node->getValueType(0);
  9151. SDValue V1 = Node->getOperand(0);
  9152. SDValue V2 = Node->getOperand(1);
  9153. int64_t Imm = cast<ConstantSDNode>(Node->getOperand(2))->getSExtValue();
  9154. SDLoc DL(Node);
  9155. // Expand through memory thusly:
  9156. // Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
  9157. // Store V1, Ptr
  9158. // Store V2, Ptr + sizeof(V1)
  9159. // If (Imm < 0)
  9160. // TrailingElts = -Imm
  9161. // Ptr = Ptr + sizeof(V1) - (TrailingElts * sizeof(VT.Elt))
  9162. // else
  9163. // Ptr = Ptr + (Imm * sizeof(VT.Elt))
  9164. // Res = Load Ptr
  9165. Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
  9166. EVT MemVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
  9167. VT.getVectorElementCount() * 2);
  9168. SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
  9169. EVT PtrVT = StackPtr.getValueType();
  9170. auto &MF = DAG.getMachineFunction();
  9171. auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
  9172. auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
  9173. // Store the lo part of CONCAT_VECTORS(V1, V2)
  9174. SDValue StoreV1 = DAG.getStore(DAG.getEntryNode(), DL, V1, StackPtr, PtrInfo);
  9175. // Store the hi part of CONCAT_VECTORS(V1, V2)
  9176. SDValue OffsetToV2 = DAG.getVScale(
  9177. DL, PtrVT,
  9178. APInt(PtrVT.getFixedSizeInBits(), VT.getStoreSize().getKnownMinValue()));
  9179. SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, OffsetToV2);
  9180. SDValue StoreV2 = DAG.getStore(StoreV1, DL, V2, StackPtr2, PtrInfo);
  9181. if (Imm >= 0) {
  9182. // Load back the required element. getVectorElementPointer takes care of
  9183. // clamping the index if it's out-of-bounds.
  9184. StackPtr = getVectorElementPointer(DAG, StackPtr, VT, Node->getOperand(2));
  9185. // Load the spliced result
  9186. return DAG.getLoad(VT, DL, StoreV2, StackPtr,
  9187. MachinePointerInfo::getUnknownStack(MF));
  9188. }
  9189. uint64_t TrailingElts = -Imm;
  9190. // NOTE: TrailingElts must be clamped so as not to read outside of V1:V2.
  9191. TypeSize EltByteSize = VT.getVectorElementType().getStoreSize();
  9192. SDValue TrailingBytes =
  9193. DAG.getConstant(TrailingElts * EltByteSize, DL, PtrVT);
  9194. if (TrailingElts > VT.getVectorMinNumElements()) {
  9195. SDValue VLBytes =
  9196. DAG.getVScale(DL, PtrVT,
  9197. APInt(PtrVT.getFixedSizeInBits(),
  9198. VT.getStoreSize().getKnownMinValue()));
  9199. TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VLBytes);
  9200. }
  9201. // Calculate the start address of the spliced result.
  9202. StackPtr2 = DAG.getNode(ISD::SUB, DL, PtrVT, StackPtr2, TrailingBytes);
  9203. // Load the spliced result
  9204. return DAG.getLoad(VT, DL, StoreV2, StackPtr2,
  9205. MachinePointerInfo::getUnknownStack(MF));
  9206. }
  9207. bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT,
  9208. SDValue &LHS, SDValue &RHS,
  9209. SDValue &CC, SDValue Mask,
  9210. SDValue EVL, bool &NeedInvert,
  9211. const SDLoc &dl, SDValue &Chain,
  9212. bool IsSignaling) const {
  9213. const TargetLowering &TLI = DAG.getTargetLoweringInfo();
  9214. MVT OpVT = LHS.getSimpleValueType();
  9215. ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
  9216. NeedInvert = false;
  9217. assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset");
  9218. bool IsNonVP = !EVL;
  9219. switch (TLI.getCondCodeAction(CCCode, OpVT)) {
  9220. default:
  9221. llvm_unreachable("Unknown condition code action!");
  9222. case TargetLowering::Legal:
  9223. // Nothing to do.
  9224. break;
  9225. case TargetLowering::Expand: {
  9226. ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(CCCode);
  9227. if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
  9228. std::swap(LHS, RHS);
  9229. CC = DAG.getCondCode(InvCC);
  9230. return true;
  9231. }
  9232. // Swapping operands didn't work. Try inverting the condition.
  9233. bool NeedSwap = false;
  9234. InvCC = getSetCCInverse(CCCode, OpVT);
  9235. if (!TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
  9236. // If inverting the condition is not enough, try swapping operands
  9237. // on top of it.
  9238. InvCC = ISD::getSetCCSwappedOperands(InvCC);
  9239. NeedSwap = true;
  9240. }
  9241. if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
  9242. CC = DAG.getCondCode(InvCC);
  9243. NeedInvert = true;
  9244. if (NeedSwap)
  9245. std::swap(LHS, RHS);
  9246. return true;
  9247. }
  9248. ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
  9249. unsigned Opc = 0;
  9250. switch (CCCode) {
  9251. default:
  9252. llvm_unreachable("Don't know how to expand this condition!");
  9253. case ISD::SETUO:
  9254. if (TLI.isCondCodeLegal(ISD::SETUNE, OpVT)) {
  9255. CC1 = ISD::SETUNE;
  9256. CC2 = ISD::SETUNE;
  9257. Opc = ISD::OR;
  9258. break;
  9259. }
  9260. assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
  9261. "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
  9262. NeedInvert = true;
  9263. [[fallthrough]];
  9264. case ISD::SETO:
  9265. assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
  9266. "If SETO is expanded, SETOEQ must be legal!");
  9267. CC1 = ISD::SETOEQ;
  9268. CC2 = ISD::SETOEQ;
  9269. Opc = ISD::AND;
  9270. break;
  9271. case ISD::SETONE:
  9272. case ISD::SETUEQ:
  9273. // If the SETUO or SETO CC isn't legal, we might be able to use
  9274. // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
  9275. // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
  9276. // the operands.
  9277. CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
  9278. if (!TLI.isCondCodeLegal(CC2, OpVT) &&
  9279. (TLI.isCondCodeLegal(ISD::SETOGT, OpVT) ||
  9280. TLI.isCondCodeLegal(ISD::SETOLT, OpVT))) {
  9281. CC1 = ISD::SETOGT;
  9282. CC2 = ISD::SETOLT;
  9283. Opc = ISD::OR;
  9284. NeedInvert = ((unsigned)CCCode & 0x8U);
  9285. break;
  9286. }
  9287. [[fallthrough]];
  9288. case ISD::SETOEQ:
  9289. case ISD::SETOGT:
  9290. case ISD::SETOGE:
  9291. case ISD::SETOLT:
  9292. case ISD::SETOLE:
  9293. case ISD::SETUNE:
  9294. case ISD::SETUGT:
  9295. case ISD::SETUGE:
  9296. case ISD::SETULT:
  9297. case ISD::SETULE:
  9298. // If we are floating point, assign and break, otherwise fall through.
  9299. if (!OpVT.isInteger()) {
  9300. // We can use the 4th bit to tell if we are the unordered
  9301. // or ordered version of the opcode.
  9302. CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
  9303. Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
  9304. CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
  9305. break;
  9306. }
  9307. // Fallthrough if we are unsigned integer.
  9308. [[fallthrough]];
  9309. case ISD::SETLE:
  9310. case ISD::SETGT:
  9311. case ISD::SETGE:
  9312. case ISD::SETLT:
  9313. case ISD::SETNE:
  9314. case ISD::SETEQ:
  9315. // If all combinations of inverting the condition and swapping operands
  9316. // didn't work then we have no means to expand the condition.
  9317. llvm_unreachable("Don't know how to expand this condition!");
  9318. }
  9319. SDValue SetCC1, SetCC2;
  9320. if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
  9321. // If we aren't the ordered or unorder operation,
  9322. // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
  9323. if (IsNonVP) {
  9324. SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
  9325. SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
  9326. } else {
  9327. SetCC1 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC1, Mask, EVL);
  9328. SetCC2 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC2, Mask, EVL);
  9329. }
  9330. } else {
  9331. // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
  9332. if (IsNonVP) {
  9333. SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
  9334. SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
  9335. } else {
  9336. SetCC1 = DAG.getSetCCVP(dl, VT, LHS, LHS, CC1, Mask, EVL);
  9337. SetCC2 = DAG.getSetCCVP(dl, VT, RHS, RHS, CC2, Mask, EVL);
  9338. }
  9339. }
  9340. if (Chain)
  9341. Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
  9342. SetCC2.getValue(1));
  9343. if (IsNonVP)
  9344. LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
  9345. else {
  9346. // Transform the binary opcode to the VP equivalent.
  9347. assert((Opc == ISD::OR || Opc == ISD::AND) && "Unexpected opcode");
  9348. Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND;
  9349. LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2, Mask, EVL);
  9350. }
  9351. RHS = SDValue();
  9352. CC = SDValue();
  9353. return true;
  9354. }
  9355. }
  9356. return false;
  9357. }