TargetLowering.cpp 356 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499650065016502650365046505650665076508650965106511651265136514651565166517651865196520652165226523652465256526652765286529653065316532653365346535653665376538653965406541654265436544654565466547654865496550655165526553655465556556655765586559656065616562656365646565656665676568656965706571657265736574657565766577657865796580658165826583658465856586658765886589659065916592659365946595659665976598659966006601660266036604660566066607660866096610661166126613661466156616661766186619662066216622662366246625662666276628662966306631663266336634663566366637663866396640664166426643664466456646664766486649665066516652665366546655665666576658665966606661666266636664666566666667666866696670667166726673667466756676667766786679668066816682668366846685668666876688668966906691669266936694669566966697669866996700670167026703670467056706670767086709671067116712671367146715671667176718671967206721672267236724672567266727672867296730673167326733673467356736673767386739674067416742674367446745674667476748674967506751675267536754675567566757675867596760676167626763676467656766676767686769677067716772677367746775677667776778677967806781678267836784678567866787678867896790679167926793679467956796679767986799680068016802680368046805680668076808680968106811681268136814681568166817681868196820682168226823682468256826682768286829683068316832683368346835683668376838683968406841684268436844684568466847684868496850685168526853685468556856685768586859686068616862686368646865686668676868686968706871687268736874687568766877687868796880688168826883688468856886688768886889689068916892689368946895689668976898689969006901690269036904690569066907690869096910691169126913691469156916691769186919692069216922692369246925692669276928692969306931693269336934693569366937693869396940694169426943694469456946694769486949695069516952695369546955695669576958695969606961696269636964696569666967696869696970697169726973697469756976697769786979698069816982698369846985698669876988698969906991699269936994699569966997699869997000700170027003700470057006700770087009701070117012701370147015701670177018701970207021702270237024702570267027702870297030703170327033703470357036703770387039704070417042704370447045704670477048704970507051705270537054705570567057705870597060706170627063706470657066706770687069707070717072707370747075707670777078707970807081708270837084708570867087708870897090709170927093709470957096709770987099710071017102710371047105710671077108710971107111711271137114711571167117711871197120712171227123712471257126712771287129713071317132713371347135713671377138713971407141714271437144714571467147714871497150715171527153715471557156715771587159716071617162716371647165716671677168716971707171717271737174717571767177717871797180718171827183718471857186718771887189719071917192719371947195719671977198719972007201720272037204720572067207720872097210721172127213721472157216721772187219722072217222722372247225722672277228722972307231723272337234723572367237723872397240724172427243724472457246724772487249725072517252725372547255725672577258725972607261726272637264726572667267726872697270727172727273727472757276727772787279728072817282728372847285728672877288728972907291729272937294729572967297729872997300730173027303730473057306730773087309731073117312731373147315731673177318731973207321732273237324732573267327732873297330733173327333733473357336733773387339734073417342734373447345734673477348734973507351735273537354735573567357735873597360736173627363736473657366736773687369737073717372737373747375737673777378737973807381738273837384738573867387738873897390739173927393739473957396739773987399740074017402740374047405740674077408740974107411741274137414741574167417741874197420742174227423742474257426742774287429743074317432743374347435743674377438743974407441744274437444744574467447744874497450745174527453745474557456745774587459746074617462746374647465746674677468746974707471747274737474747574767477747874797480748174827483748474857486748774887489749074917492749374947495749674977498749975007501750275037504750575067507750875097510751175127513751475157516751775187519752075217522752375247525752675277528752975307531753275337534753575367537753875397540754175427543754475457546754775487549755075517552755375547555755675577558755975607561756275637564756575667567756875697570757175727573757475757576757775787579758075817582758375847585758675877588758975907591759275937594759575967597759875997600760176027603760476057606760776087609761076117612761376147615761676177618761976207621762276237624762576267627762876297630763176327633763476357636763776387639764076417642764376447645764676477648764976507651765276537654765576567657765876597660766176627663766476657666766776687669767076717672767376747675767676777678767976807681768276837684768576867687768876897690769176927693769476957696769776987699770077017702770377047705770677077708770977107711771277137714771577167717771877197720772177227723772477257726772777287729773077317732773377347735773677377738773977407741774277437744774577467747774877497750775177527753775477557756775777587759776077617762776377647765776677677768776977707771777277737774777577767777777877797780778177827783778477857786778777887789779077917792779377947795779677977798779978007801780278037804780578067807780878097810781178127813781478157816781778187819782078217822782378247825782678277828782978307831783278337834783578367837783878397840784178427843784478457846784778487849785078517852785378547855785678577858785978607861786278637864786578667867786878697870787178727873787478757876787778787879788078817882788378847885788678877888788978907891789278937894789578967897789878997900790179027903790479057906790779087909791079117912791379147915791679177918791979207921792279237924792579267927792879297930793179327933793479357936793779387939794079417942794379447945794679477948794979507951795279537954795579567957795879597960796179627963796479657966796779687969797079717972797379747975797679777978797979807981798279837984798579867987798879897990799179927993799479957996799779987999800080018002800380048005800680078008800980108011801280138014801580168017801880198020802180228023802480258026802780288029803080318032803380348035803680378038803980408041804280438044804580468047804880498050805180528053805480558056805780588059806080618062806380648065806680678068806980708071807280738074807580768077807880798080808180828083808480858086808780888089809080918092809380948095809680978098809981008101810281038104810581068107810881098110811181128113811481158116811781188119812081218122812381248125812681278128812981308131813281338134813581368137813881398140814181428143814481458146814781488149815081518152815381548155815681578158815981608161816281638164816581668167816881698170817181728173817481758176817781788179818081818182818381848185818681878188818981908191819281938194819581968197819881998200820182028203820482058206820782088209821082118212821382148215821682178218821982208221822282238224822582268227822882298230823182328233823482358236823782388239824082418242824382448245824682478248824982508251825282538254825582568257825882598260826182628263826482658266826782688269827082718272827382748275827682778278827982808281828282838284828582868287828882898290829182928293829482958296829782988299830083018302830383048305830683078308830983108311831283138314831583168317831883198320832183228323832483258326832783288329833083318332833383348335833683378338833983408341834283438344834583468347834883498350835183528353835483558356835783588359836083618362836383648365836683678368836983708371837283738374837583768377837883798380838183828383838483858386838783888389839083918392839383948395839683978398839984008401840284038404840584068407840884098410841184128413841484158416841784188419842084218422842384248425842684278428842984308431843284338434843584368437843884398440844184428443844484458446844784488449845084518452845384548455845684578458845984608461846284638464846584668467846884698470847184728473847484758476847784788479848084818482848384848485848684878488848984908491849284938494849584968497849884998500850185028503850485058506850785088509851085118512851385148515851685178518851985208521852285238524852585268527852885298530853185328533853485358536853785388539854085418542854385448545854685478548854985508551855285538554855585568557855885598560856185628563856485658566856785688569857085718572857385748575857685778578857985808581858285838584858585868587858885898590859185928593859485958596859785988599860086018602860386048605860686078608860986108611861286138614861586168617861886198620862186228623862486258626862786288629863086318632863386348635863686378638863986408641864286438644864586468647864886498650865186528653865486558656865786588659866086618662866386648665866686678668866986708671867286738674867586768677867886798680868186828683868486858686868786888689869086918692869386948695869686978698869987008701870287038704870587068707870887098710871187128713871487158716871787188719872087218722872387248725872687278728872987308731873287338734873587368737873887398740874187428743874487458746874787488749875087518752875387548755875687578758875987608761876287638764876587668767876887698770877187728773877487758776877787788779878087818782878387848785878687878788878987908791879287938794879587968797879887998800880188028803880488058806880788088809881088118812881388148815881688178818881988208821882288238824882588268827882888298830883188328833883488358836883788388839884088418842884388448845884688478848884988508851885288538854885588568857885888598860886188628863886488658866886788688869887088718872887388748875887688778878887988808881888288838884888588868887888888898890889188928893889488958896889788988899890089018902890389048905890689078908890989108911891289138914891589168917891889198920892189228923892489258926892789288929893089318932893389348935893689378938893989408941894289438944894589468947894889498950895189528953895489558956895789588959896089618962896389648965896689678968896989708971897289738974897589768977897889798980898189828983898489858986898789888989899089918992899389948995899689978998899990009001900290039004900590069007900890099010901190129013901490159016901790189019902090219022902390249025902690279028902990309031903290339034903590369037903890399040904190429043904490459046904790489049905090519052905390549055905690579058905990609061906290639064
  1. //===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This implements the TargetLowering class.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "llvm/CodeGen/TargetLowering.h"
  13. #include "llvm/ADT/STLExtras.h"
  14. #include "llvm/CodeGen/CallingConvLower.h"
  15. #include "llvm/CodeGen/MachineFrameInfo.h"
  16. #include "llvm/CodeGen/MachineFunction.h"
  17. #include "llvm/CodeGen/MachineJumpTableInfo.h"
  18. #include "llvm/CodeGen/MachineRegisterInfo.h"
  19. #include "llvm/CodeGen/SelectionDAG.h"
  20. #include "llvm/CodeGen/TargetRegisterInfo.h"
  21. #include "llvm/CodeGen/TargetSubtargetInfo.h"
  22. #include "llvm/IR/DataLayout.h"
  23. #include "llvm/IR/DerivedTypes.h"
  24. #include "llvm/IR/GlobalVariable.h"
  25. #include "llvm/IR/LLVMContext.h"
  26. #include "llvm/MC/MCAsmInfo.h"
  27. #include "llvm/MC/MCExpr.h"
  28. #include "llvm/Support/DivisionByConstantInfo.h"
  29. #include "llvm/Support/ErrorHandling.h"
  30. #include "llvm/Support/KnownBits.h"
  31. #include "llvm/Support/MathExtras.h"
  32. #include "llvm/Target/TargetLoweringObjectFile.h"
  33. #include "llvm/Target/TargetMachine.h"
  34. #include <cctype>
  35. using namespace llvm;
  36. /// NOTE: The TargetMachine owns TLOF.
  37. TargetLowering::TargetLowering(const TargetMachine &tm)
  38. : TargetLoweringBase(tm) {}
  39. const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
  40. return nullptr;
  41. }
  42. bool TargetLowering::isPositionIndependent() const {
  43. return getTargetMachine().isPositionIndependent();
  44. }
  45. /// Check whether a given call node is in tail position within its function. If
  46. /// so, it sets Chain to the input chain of the tail call.
  47. bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
  48. SDValue &Chain) const {
  49. const Function &F = DAG.getMachineFunction().getFunction();
  50. // First, check if tail calls have been disabled in this function.
  51. if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
  52. return false;
  53. // Conservatively require the attributes of the call to match those of
  54. // the return. Ignore following attributes because they don't affect the
  55. // call sequence.
  56. AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
  57. for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
  58. Attribute::DereferenceableOrNull, Attribute::NoAlias,
  59. Attribute::NonNull, Attribute::NoUndef})
  60. CallerAttrs.removeAttribute(Attr);
  61. if (CallerAttrs.hasAttributes())
  62. return false;
  63. // It's not safe to eliminate the sign / zero extension of the return value.
  64. if (CallerAttrs.contains(Attribute::ZExt) ||
  65. CallerAttrs.contains(Attribute::SExt))
  66. return false;
  67. // Check if the only use is a function return node.
  68. return isUsedByReturnOnly(Node, Chain);
  69. }
  70. bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
  71. const uint32_t *CallerPreservedMask,
  72. const SmallVectorImpl<CCValAssign> &ArgLocs,
  73. const SmallVectorImpl<SDValue> &OutVals) const {
  74. for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
  75. const CCValAssign &ArgLoc = ArgLocs[I];
  76. if (!ArgLoc.isRegLoc())
  77. continue;
  78. MCRegister Reg = ArgLoc.getLocReg();
  79. // Only look at callee saved registers.
  80. if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
  81. continue;
  82. // Check that we pass the value used for the caller.
  83. // (We look for a CopyFromReg reading a virtual register that is used
  84. // for the function live-in value of register Reg)
  85. SDValue Value = OutVals[I];
  86. if (Value->getOpcode() != ISD::CopyFromReg)
  87. return false;
  88. Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
  89. if (MRI.getLiveInPhysReg(ArgReg) != Reg)
  90. return false;
  91. }
  92. return true;
  93. }
  94. /// Set CallLoweringInfo attribute flags based on a call instruction
  95. /// and called function attributes.
  96. void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
  97. unsigned ArgIdx) {
  98. IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
  99. IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
  100. IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
  101. IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
  102. IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
  103. IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
  104. IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
  105. IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
  106. IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
  107. IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
  108. IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
  109. IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
  110. Alignment = Call->getParamStackAlign(ArgIdx);
  111. IndirectType = nullptr;
  112. assert(IsByVal + IsPreallocated + IsInAlloca <= 1 &&
  113. "multiple ABI attributes?");
  114. if (IsByVal) {
  115. IndirectType = Call->getParamByValType(ArgIdx);
  116. if (!Alignment)
  117. Alignment = Call->getParamAlign(ArgIdx);
  118. }
  119. if (IsPreallocated)
  120. IndirectType = Call->getParamPreallocatedType(ArgIdx);
  121. if (IsInAlloca)
  122. IndirectType = Call->getParamInAllocaType(ArgIdx);
  123. }
  124. /// Generate a libcall taking the given operands as arguments and returning a
  125. /// result of type RetVT.
  126. std::pair<SDValue, SDValue>
  127. TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
  128. ArrayRef<SDValue> Ops,
  129. MakeLibCallOptions CallOptions,
  130. const SDLoc &dl,
  131. SDValue InChain) const {
  132. if (!InChain)
  133. InChain = DAG.getEntryNode();
  134. TargetLowering::ArgListTy Args;
  135. Args.reserve(Ops.size());
  136. TargetLowering::ArgListEntry Entry;
  137. for (unsigned i = 0; i < Ops.size(); ++i) {
  138. SDValue NewOp = Ops[i];
  139. Entry.Node = NewOp;
  140. Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
  141. Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(),
  142. CallOptions.IsSExt);
  143. Entry.IsZExt = !Entry.IsSExt;
  144. if (CallOptions.IsSoften &&
  145. !shouldExtendTypeInLibCall(CallOptions.OpsVTBeforeSoften[i])) {
  146. Entry.IsSExt = Entry.IsZExt = false;
  147. }
  148. Args.push_back(Entry);
  149. }
  150. if (LC == RTLIB::UNKNOWN_LIBCALL)
  151. report_fatal_error("Unsupported library call operation!");
  152. SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
  153. getPointerTy(DAG.getDataLayout()));
  154. Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
  155. TargetLowering::CallLoweringInfo CLI(DAG);
  156. bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt);
  157. bool zeroExtend = !signExtend;
  158. if (CallOptions.IsSoften &&
  159. !shouldExtendTypeInLibCall(CallOptions.RetVTBeforeSoften)) {
  160. signExtend = zeroExtend = false;
  161. }
  162. CLI.setDebugLoc(dl)
  163. .setChain(InChain)
  164. .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
  165. .setNoReturn(CallOptions.DoesNotReturn)
  166. .setDiscardResult(!CallOptions.IsReturnValueUsed)
  167. .setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization)
  168. .setSExtResult(signExtend)
  169. .setZExtResult(zeroExtend);
  170. return LowerCallTo(CLI);
  171. }
  172. bool TargetLowering::findOptimalMemOpLowering(
  173. std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
  174. unsigned SrcAS, const AttributeList &FuncAttributes) const {
  175. if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
  176. return false;
  177. EVT VT = getOptimalMemOpType(Op, FuncAttributes);
  178. if (VT == MVT::Other) {
  179. // Use the largest integer type whose alignment constraints are satisfied.
  180. // We only need to check DstAlign here as SrcAlign is always greater or
  181. // equal to DstAlign (or zero).
  182. VT = MVT::i64;
  183. if (Op.isFixedDstAlign())
  184. while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
  185. !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
  186. VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
  187. assert(VT.isInteger());
  188. // Find the largest legal integer type.
  189. MVT LVT = MVT::i64;
  190. while (!isTypeLegal(LVT))
  191. LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
  192. assert(LVT.isInteger());
  193. // If the type we've chosen is larger than the largest legal integer type
  194. // then use that instead.
  195. if (VT.bitsGT(LVT))
  196. VT = LVT;
  197. }
  198. unsigned NumMemOps = 0;
  199. uint64_t Size = Op.size();
  200. while (Size) {
  201. unsigned VTSize = VT.getSizeInBits() / 8;
  202. while (VTSize > Size) {
  203. // For now, only use non-vector load / store's for the left-over pieces.
  204. EVT NewVT = VT;
  205. unsigned NewVTSize;
  206. bool Found = false;
  207. if (VT.isVector() || VT.isFloatingPoint()) {
  208. NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
  209. if (isOperationLegalOrCustom(ISD::STORE, NewVT) &&
  210. isSafeMemOpType(NewVT.getSimpleVT()))
  211. Found = true;
  212. else if (NewVT == MVT::i64 &&
  213. isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
  214. isSafeMemOpType(MVT::f64)) {
  215. // i64 is usually not legal on 32-bit targets, but f64 may be.
  216. NewVT = MVT::f64;
  217. Found = true;
  218. }
  219. }
  220. if (!Found) {
  221. do {
  222. NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
  223. if (NewVT == MVT::i8)
  224. break;
  225. } while (!isSafeMemOpType(NewVT.getSimpleVT()));
  226. }
  227. NewVTSize = NewVT.getSizeInBits() / 8;
  228. // If the new VT cannot cover all of the remaining bits, then consider
  229. // issuing a (or a pair of) unaligned and overlapping load / store.
  230. bool Fast;
  231. if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
  232. allowsMisalignedMemoryAccesses(
  233. VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
  234. MachineMemOperand::MONone, &Fast) &&
  235. Fast)
  236. VTSize = Size;
  237. else {
  238. VT = NewVT;
  239. VTSize = NewVTSize;
  240. }
  241. }
  242. if (++NumMemOps > Limit)
  243. return false;
  244. MemOps.push_back(VT);
  245. Size -= VTSize;
  246. }
  247. return true;
  248. }
  249. /// Soften the operands of a comparison. This code is shared among BR_CC,
  250. /// SELECT_CC, and SETCC handlers.
  251. void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
  252. SDValue &NewLHS, SDValue &NewRHS,
  253. ISD::CondCode &CCCode,
  254. const SDLoc &dl, const SDValue OldLHS,
  255. const SDValue OldRHS) const {
  256. SDValue Chain;
  257. return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
  258. OldRHS, Chain);
  259. }
  260. void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
  261. SDValue &NewLHS, SDValue &NewRHS,
  262. ISD::CondCode &CCCode,
  263. const SDLoc &dl, const SDValue OldLHS,
  264. const SDValue OldRHS,
  265. SDValue &Chain,
  266. bool IsSignaling) const {
  267. // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
  268. // not supporting it. We can update this code when libgcc provides such
  269. // functions.
  270. assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
  271. && "Unsupported setcc type!");
  272. // Expand into one or more soft-fp libcall(s).
  273. RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
  274. bool ShouldInvertCC = false;
  275. switch (CCCode) {
  276. case ISD::SETEQ:
  277. case ISD::SETOEQ:
  278. LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
  279. (VT == MVT::f64) ? RTLIB::OEQ_F64 :
  280. (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
  281. break;
  282. case ISD::SETNE:
  283. case ISD::SETUNE:
  284. LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
  285. (VT == MVT::f64) ? RTLIB::UNE_F64 :
  286. (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
  287. break;
  288. case ISD::SETGE:
  289. case ISD::SETOGE:
  290. LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
  291. (VT == MVT::f64) ? RTLIB::OGE_F64 :
  292. (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
  293. break;
  294. case ISD::SETLT:
  295. case ISD::SETOLT:
  296. LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
  297. (VT == MVT::f64) ? RTLIB::OLT_F64 :
  298. (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
  299. break;
  300. case ISD::SETLE:
  301. case ISD::SETOLE:
  302. LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
  303. (VT == MVT::f64) ? RTLIB::OLE_F64 :
  304. (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
  305. break;
  306. case ISD::SETGT:
  307. case ISD::SETOGT:
  308. LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
  309. (VT == MVT::f64) ? RTLIB::OGT_F64 :
  310. (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
  311. break;
  312. case ISD::SETO:
  313. ShouldInvertCC = true;
  314. LLVM_FALLTHROUGH;
  315. case ISD::SETUO:
  316. LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
  317. (VT == MVT::f64) ? RTLIB::UO_F64 :
  318. (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
  319. break;
  320. case ISD::SETONE:
  321. // SETONE = O && UNE
  322. ShouldInvertCC = true;
  323. LLVM_FALLTHROUGH;
  324. case ISD::SETUEQ:
  325. LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
  326. (VT == MVT::f64) ? RTLIB::UO_F64 :
  327. (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
  328. LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
  329. (VT == MVT::f64) ? RTLIB::OEQ_F64 :
  330. (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
  331. break;
  332. default:
  333. // Invert CC for unordered comparisons
  334. ShouldInvertCC = true;
  335. switch (CCCode) {
  336. case ISD::SETULT:
  337. LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
  338. (VT == MVT::f64) ? RTLIB::OGE_F64 :
  339. (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
  340. break;
  341. case ISD::SETULE:
  342. LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
  343. (VT == MVT::f64) ? RTLIB::OGT_F64 :
  344. (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
  345. break;
  346. case ISD::SETUGT:
  347. LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
  348. (VT == MVT::f64) ? RTLIB::OLE_F64 :
  349. (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
  350. break;
  351. case ISD::SETUGE:
  352. LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
  353. (VT == MVT::f64) ? RTLIB::OLT_F64 :
  354. (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
  355. break;
  356. default: llvm_unreachable("Do not know how to soften this setcc!");
  357. }
  358. }
  359. // Use the target specific return value for comparions lib calls.
  360. EVT RetVT = getCmpLibcallReturnType();
  361. SDValue Ops[2] = {NewLHS, NewRHS};
  362. TargetLowering::MakeLibCallOptions CallOptions;
  363. EVT OpsVT[2] = { OldLHS.getValueType(),
  364. OldRHS.getValueType() };
  365. CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
  366. auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
  367. NewLHS = Call.first;
  368. NewRHS = DAG.getConstant(0, dl, RetVT);
  369. CCCode = getCmpLibcallCC(LC1);
  370. if (ShouldInvertCC) {
  371. assert(RetVT.isInteger());
  372. CCCode = getSetCCInverse(CCCode, RetVT);
  373. }
  374. if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
  375. // Update Chain.
  376. Chain = Call.second;
  377. } else {
  378. EVT SetCCVT =
  379. getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
  380. SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
  381. auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
  382. CCCode = getCmpLibcallCC(LC2);
  383. if (ShouldInvertCC)
  384. CCCode = getSetCCInverse(CCCode, RetVT);
  385. NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
  386. if (Chain)
  387. Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
  388. Call2.second);
  389. NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
  390. Tmp.getValueType(), Tmp, NewLHS);
  391. NewRHS = SDValue();
  392. }
  393. }
  394. /// Return the entry encoding for a jump table in the current function. The
  395. /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
  396. unsigned TargetLowering::getJumpTableEncoding() const {
  397. // In non-pic modes, just use the address of a block.
  398. if (!isPositionIndependent())
  399. return MachineJumpTableInfo::EK_BlockAddress;
  400. // In PIC mode, if the target supports a GPRel32 directive, use it.
  401. if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
  402. return MachineJumpTableInfo::EK_GPRel32BlockAddress;
  403. // Otherwise, use a label difference.
  404. return MachineJumpTableInfo::EK_LabelDifference32;
  405. }
  406. SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
  407. SelectionDAG &DAG) const {
  408. // If our PIC model is GP relative, use the global offset table as the base.
  409. unsigned JTEncoding = getJumpTableEncoding();
  410. if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
  411. (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress))
  412. return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(DAG.getDataLayout()));
  413. return Table;
  414. }
  415. /// This returns the relocation base for the given PIC jumptable, the same as
  416. /// getPICJumpTableRelocBase, but as an MCExpr.
  417. const MCExpr *
  418. TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
  419. unsigned JTI,MCContext &Ctx) const{
  420. // The normal PIC reloc base is the label at the start of the jump table.
  421. return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
  422. }
  423. bool
  424. TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
  425. const TargetMachine &TM = getTargetMachine();
  426. const GlobalValue *GV = GA->getGlobal();
  427. // If the address is not even local to this DSO we will have to load it from
  428. // a got and then add the offset.
  429. if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
  430. return false;
  431. // If the code is position independent we will have to add a base register.
  432. if (isPositionIndependent())
  433. return false;
  434. // Otherwise we can do it.
  435. return true;
  436. }
  437. //===----------------------------------------------------------------------===//
  438. // Optimization Methods
  439. //===----------------------------------------------------------------------===//
  440. /// If the specified instruction has a constant integer operand and there are
  441. /// bits set in that constant that are not demanded, then clear those bits and
  442. /// return true.
  443. bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
  444. const APInt &DemandedBits,
  445. const APInt &DemandedElts,
  446. TargetLoweringOpt &TLO) const {
  447. SDLoc DL(Op);
  448. unsigned Opcode = Op.getOpcode();
  449. // Do target-specific constant optimization.
  450. if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
  451. return TLO.New.getNode();
  452. // FIXME: ISD::SELECT, ISD::SELECT_CC
  453. switch (Opcode) {
  454. default:
  455. break;
  456. case ISD::XOR:
  457. case ISD::AND:
  458. case ISD::OR: {
  459. auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
  460. if (!Op1C || Op1C->isOpaque())
  461. return false;
  462. // If this is a 'not' op, don't touch it because that's a canonical form.
  463. const APInt &C = Op1C->getAPIntValue();
  464. if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
  465. return false;
  466. if (!C.isSubsetOf(DemandedBits)) {
  467. EVT VT = Op.getValueType();
  468. SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
  469. SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
  470. return TLO.CombineTo(Op, NewOp);
  471. }
  472. break;
  473. }
  474. }
  475. return false;
  476. }
  477. bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
  478. const APInt &DemandedBits,
  479. TargetLoweringOpt &TLO) const {
  480. EVT VT = Op.getValueType();
  481. APInt DemandedElts = VT.isVector()
  482. ? APInt::getAllOnes(VT.getVectorNumElements())
  483. : APInt(1, 1);
  484. return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
  485. }
  486. /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
  487. /// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
  488. /// generalized for targets with other types of implicit widening casts.
  489. bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
  490. const APInt &Demanded,
  491. TargetLoweringOpt &TLO) const {
  492. assert(Op.getNumOperands() == 2 &&
  493. "ShrinkDemandedOp only supports binary operators!");
  494. assert(Op.getNode()->getNumValues() == 1 &&
  495. "ShrinkDemandedOp only supports nodes with one result!");
  496. SelectionDAG &DAG = TLO.DAG;
  497. SDLoc dl(Op);
  498. // Early return, as this function cannot handle vector types.
  499. if (Op.getValueType().isVector())
  500. return false;
  501. // Don't do this if the node has another user, which may require the
  502. // full value.
  503. if (!Op.getNode()->hasOneUse())
  504. return false;
  505. // Search for the smallest integer type with free casts to and from
  506. // Op's type. For expedience, just check power-of-2 integer types.
  507. const TargetLowering &TLI = DAG.getTargetLoweringInfo();
  508. unsigned DemandedSize = Demanded.getActiveBits();
  509. unsigned SmallVTBits = DemandedSize;
  510. if (!isPowerOf2_32(SmallVTBits))
  511. SmallVTBits = NextPowerOf2(SmallVTBits);
  512. for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
  513. EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
  514. if (TLI.isTruncateFree(Op.getValueType(), SmallVT) &&
  515. TLI.isZExtFree(SmallVT, Op.getValueType())) {
  516. // We found a type with free casts.
  517. SDValue X = DAG.getNode(
  518. Op.getOpcode(), dl, SmallVT,
  519. DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
  520. DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
  521. assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
  522. SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), X);
  523. return TLO.CombineTo(Op, Z);
  524. }
  525. }
  526. return false;
  527. }
  528. bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
  529. DAGCombinerInfo &DCI) const {
  530. SelectionDAG &DAG = DCI.DAG;
  531. TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
  532. !DCI.isBeforeLegalizeOps());
  533. KnownBits Known;
  534. bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
  535. if (Simplified) {
  536. DCI.AddToWorklist(Op.getNode());
  537. DCI.CommitTargetLoweringOpt(TLO);
  538. }
  539. return Simplified;
  540. }
  541. bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
  542. const APInt &DemandedElts,
  543. DAGCombinerInfo &DCI) const {
  544. SelectionDAG &DAG = DCI.DAG;
  545. TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
  546. !DCI.isBeforeLegalizeOps());
  547. KnownBits Known;
  548. bool Simplified =
  549. SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
  550. if (Simplified) {
  551. DCI.AddToWorklist(Op.getNode());
  552. DCI.CommitTargetLoweringOpt(TLO);
  553. }
  554. return Simplified;
  555. }
  556. bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
  557. KnownBits &Known,
  558. TargetLoweringOpt &TLO,
  559. unsigned Depth,
  560. bool AssumeSingleUse) const {
  561. EVT VT = Op.getValueType();
  562. // TODO: We can probably do more work on calculating the known bits and
  563. // simplifying the operations for scalable vectors, but for now we just
  564. // bail out.
  565. if (VT.isScalableVector()) {
  566. // Pretend we don't know anything for now.
  567. Known = KnownBits(DemandedBits.getBitWidth());
  568. return false;
  569. }
  570. APInt DemandedElts = VT.isVector()
  571. ? APInt::getAllOnes(VT.getVectorNumElements())
  572. : APInt(1, 1);
  573. return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
  574. AssumeSingleUse);
  575. }
  576. // TODO: Can we merge SelectionDAG::GetDemandedBits into this?
  577. // TODO: Under what circumstances can we create nodes? Constant folding?
  578. SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
  579. SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
  580. SelectionDAG &DAG, unsigned Depth) const {
  581. // Limit search depth.
  582. if (Depth >= SelectionDAG::MaxRecursionDepth)
  583. return SDValue();
  584. // Ignore UNDEFs.
  585. if (Op.isUndef())
  586. return SDValue();
  587. // Not demanding any bits/elts from Op.
  588. if (DemandedBits == 0 || DemandedElts == 0)
  589. return DAG.getUNDEF(Op.getValueType());
  590. bool IsLE = DAG.getDataLayout().isLittleEndian();
  591. unsigned NumElts = DemandedElts.getBitWidth();
  592. unsigned BitWidth = DemandedBits.getBitWidth();
  593. KnownBits LHSKnown, RHSKnown;
  594. switch (Op.getOpcode()) {
  595. case ISD::BITCAST: {
  596. SDValue Src = peekThroughBitcasts(Op.getOperand(0));
  597. EVT SrcVT = Src.getValueType();
  598. EVT DstVT = Op.getValueType();
  599. if (SrcVT == DstVT)
  600. return Src;
  601. unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
  602. unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
  603. if (NumSrcEltBits == NumDstEltBits)
  604. if (SDValue V = SimplifyMultipleUseDemandedBits(
  605. Src, DemandedBits, DemandedElts, DAG, Depth + 1))
  606. return DAG.getBitcast(DstVT, V);
  607. if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
  608. unsigned Scale = NumDstEltBits / NumSrcEltBits;
  609. unsigned NumSrcElts = SrcVT.getVectorNumElements();
  610. APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
  611. APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
  612. for (unsigned i = 0; i != Scale; ++i) {
  613. unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
  614. unsigned BitOffset = EltOffset * NumSrcEltBits;
  615. APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
  616. if (!Sub.isZero()) {
  617. DemandedSrcBits |= Sub;
  618. for (unsigned j = 0; j != NumElts; ++j)
  619. if (DemandedElts[j])
  620. DemandedSrcElts.setBit((j * Scale) + i);
  621. }
  622. }
  623. if (SDValue V = SimplifyMultipleUseDemandedBits(
  624. Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
  625. return DAG.getBitcast(DstVT, V);
  626. }
  627. // TODO - bigendian once we have test coverage.
  628. if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
  629. unsigned Scale = NumSrcEltBits / NumDstEltBits;
  630. unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
  631. APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
  632. APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
  633. for (unsigned i = 0; i != NumElts; ++i)
  634. if (DemandedElts[i]) {
  635. unsigned Offset = (i % Scale) * NumDstEltBits;
  636. DemandedSrcBits.insertBits(DemandedBits, Offset);
  637. DemandedSrcElts.setBit(i / Scale);
  638. }
  639. if (SDValue V = SimplifyMultipleUseDemandedBits(
  640. Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
  641. return DAG.getBitcast(DstVT, V);
  642. }
  643. break;
  644. }
  645. case ISD::AND: {
  646. LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
  647. RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
  648. // If all of the demanded bits are known 1 on one side, return the other.
  649. // These bits cannot contribute to the result of the 'and' in this
  650. // context.
  651. if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
  652. return Op.getOperand(0);
  653. if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
  654. return Op.getOperand(1);
  655. break;
  656. }
  657. case ISD::OR: {
  658. LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
  659. RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
  660. // If all of the demanded bits are known zero on one side, return the
  661. // other. These bits cannot contribute to the result of the 'or' in this
  662. // context.
  663. if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
  664. return Op.getOperand(0);
  665. if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
  666. return Op.getOperand(1);
  667. break;
  668. }
  669. case ISD::XOR: {
  670. LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
  671. RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
  672. // If all of the demanded bits are known zero on one side, return the
  673. // other.
  674. if (DemandedBits.isSubsetOf(RHSKnown.Zero))
  675. return Op.getOperand(0);
  676. if (DemandedBits.isSubsetOf(LHSKnown.Zero))
  677. return Op.getOperand(1);
  678. break;
  679. }
  680. case ISD::SHL: {
  681. // If we are only demanding sign bits then we can use the shift source
  682. // directly.
  683. if (const APInt *MaxSA =
  684. DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
  685. SDValue Op0 = Op.getOperand(0);
  686. unsigned ShAmt = MaxSA->getZExtValue();
  687. unsigned NumSignBits =
  688. DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
  689. unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
  690. if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
  691. return Op0;
  692. }
  693. break;
  694. }
  695. case ISD::SETCC: {
  696. SDValue Op0 = Op.getOperand(0);
  697. SDValue Op1 = Op.getOperand(1);
  698. ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
  699. // If (1) we only need the sign-bit, (2) the setcc operands are the same
  700. // width as the setcc result, and (3) the result of a setcc conforms to 0 or
  701. // -1, we may be able to bypass the setcc.
  702. if (DemandedBits.isSignMask() &&
  703. Op0.getScalarValueSizeInBits() == BitWidth &&
  704. getBooleanContents(Op0.getValueType()) ==
  705. BooleanContent::ZeroOrNegativeOneBooleanContent) {
  706. // If we're testing X < 0, then this compare isn't needed - just use X!
  707. // FIXME: We're limiting to integer types here, but this should also work
  708. // if we don't care about FP signed-zero. The use of SETLT with FP means
  709. // that we don't care about NaNs.
  710. if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
  711. (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
  712. return Op0;
  713. }
  714. break;
  715. }
  716. case ISD::SIGN_EXTEND_INREG: {
  717. // If none of the extended bits are demanded, eliminate the sextinreg.
  718. SDValue Op0 = Op.getOperand(0);
  719. EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
  720. unsigned ExBits = ExVT.getScalarSizeInBits();
  721. if (DemandedBits.getActiveBits() <= ExBits)
  722. return Op0;
  723. // If the input is already sign extended, just drop the extension.
  724. unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
  725. if (NumSignBits >= (BitWidth - ExBits + 1))
  726. return Op0;
  727. break;
  728. }
  729. case ISD::ANY_EXTEND_VECTOR_INREG:
  730. case ISD::SIGN_EXTEND_VECTOR_INREG:
  731. case ISD::ZERO_EXTEND_VECTOR_INREG: {
  732. // If we only want the lowest element and none of extended bits, then we can
  733. // return the bitcasted source vector.
  734. SDValue Src = Op.getOperand(0);
  735. EVT SrcVT = Src.getValueType();
  736. EVT DstVT = Op.getValueType();
  737. if (IsLE && DemandedElts == 1 &&
  738. DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
  739. DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
  740. return DAG.getBitcast(DstVT, Src);
  741. }
  742. break;
  743. }
  744. case ISD::INSERT_VECTOR_ELT: {
  745. // If we don't demand the inserted element, return the base vector.
  746. SDValue Vec = Op.getOperand(0);
  747. auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
  748. EVT VecVT = Vec.getValueType();
  749. if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
  750. !DemandedElts[CIdx->getZExtValue()])
  751. return Vec;
  752. break;
  753. }
  754. case ISD::INSERT_SUBVECTOR: {
  755. SDValue Vec = Op.getOperand(0);
  756. SDValue Sub = Op.getOperand(1);
  757. uint64_t Idx = Op.getConstantOperandVal(2);
  758. unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
  759. APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
  760. // If we don't demand the inserted subvector, return the base vector.
  761. if (DemandedSubElts == 0)
  762. return Vec;
  763. // If this simply widens the lowest subvector, see if we can do it earlier.
  764. if (Idx == 0 && Vec.isUndef()) {
  765. if (SDValue NewSub = SimplifyMultipleUseDemandedBits(
  766. Sub, DemandedBits, DemandedSubElts, DAG, Depth + 1))
  767. return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
  768. Op.getOperand(0), NewSub, Op.getOperand(2));
  769. }
  770. break;
  771. }
  772. case ISD::VECTOR_SHUFFLE: {
  773. ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
  774. // If all the demanded elts are from one operand and are inline,
  775. // then we can use the operand directly.
  776. bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
  777. for (unsigned i = 0; i != NumElts; ++i) {
  778. int M = ShuffleMask[i];
  779. if (M < 0 || !DemandedElts[i])
  780. continue;
  781. AllUndef = false;
  782. IdentityLHS &= (M == (int)i);
  783. IdentityRHS &= ((M - NumElts) == i);
  784. }
  785. if (AllUndef)
  786. return DAG.getUNDEF(Op.getValueType());
  787. if (IdentityLHS)
  788. return Op.getOperand(0);
  789. if (IdentityRHS)
  790. return Op.getOperand(1);
  791. break;
  792. }
  793. default:
  794. if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
  795. if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
  796. Op, DemandedBits, DemandedElts, DAG, Depth))
  797. return V;
  798. break;
  799. }
  800. return SDValue();
  801. }
  802. SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
  803. SDValue Op, const APInt &DemandedBits, SelectionDAG &DAG,
  804. unsigned Depth) const {
  805. EVT VT = Op.getValueType();
  806. APInt DemandedElts = VT.isVector()
  807. ? APInt::getAllOnes(VT.getVectorNumElements())
  808. : APInt(1, 1);
  809. return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
  810. Depth);
  811. }
  812. SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts(
  813. SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
  814. unsigned Depth) const {
  815. APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
  816. return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
  817. Depth);
  818. }
  819. /// Look at Op. At this point, we know that only the OriginalDemandedBits of the
  820. /// result of Op are ever used downstream. If we can use this information to
  821. /// simplify Op, create a new simplified DAG node and return true, returning the
  822. /// original and new nodes in Old and New. Otherwise, analyze the expression and
  823. /// return a mask of Known bits for the expression (used to simplify the
  824. /// caller). The Known bits may only be accurate for those bits in the
  825. /// OriginalDemandedBits and OriginalDemandedElts.
  826. bool TargetLowering::SimplifyDemandedBits(
  827. SDValue Op, const APInt &OriginalDemandedBits,
  828. const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
  829. unsigned Depth, bool AssumeSingleUse) const {
  830. unsigned BitWidth = OriginalDemandedBits.getBitWidth();
  831. assert(Op.getScalarValueSizeInBits() == BitWidth &&
  832. "Mask size mismatches value type size!");
  833. // Don't know anything.
  834. Known = KnownBits(BitWidth);
  835. // TODO: We can probably do more work on calculating the known bits and
  836. // simplifying the operations for scalable vectors, but for now we just
  837. // bail out.
  838. if (Op.getValueType().isScalableVector())
  839. return false;
  840. bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
  841. unsigned NumElts = OriginalDemandedElts.getBitWidth();
  842. assert((!Op.getValueType().isVector() ||
  843. NumElts == Op.getValueType().getVectorNumElements()) &&
  844. "Unexpected vector size");
  845. APInt DemandedBits = OriginalDemandedBits;
  846. APInt DemandedElts = OriginalDemandedElts;
  847. SDLoc dl(Op);
  848. auto &DL = TLO.DAG.getDataLayout();
  849. // Undef operand.
  850. if (Op.isUndef())
  851. return false;
  852. if (Op.getOpcode() == ISD::Constant) {
  853. // We know all of the bits for a constant!
  854. Known = KnownBits::makeConstant(cast<ConstantSDNode>(Op)->getAPIntValue());
  855. return false;
  856. }
  857. if (Op.getOpcode() == ISD::ConstantFP) {
  858. // We know all of the bits for a floating point constant!
  859. Known = KnownBits::makeConstant(
  860. cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
  861. return false;
  862. }
  863. // Other users may use these bits.
  864. EVT VT = Op.getValueType();
  865. if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) {
  866. if (Depth != 0) {
  867. // If not at the root, Just compute the Known bits to
  868. // simplify things downstream.
  869. Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
  870. return false;
  871. }
  872. // If this is the root being simplified, allow it to have multiple uses,
  873. // just set the DemandedBits/Elts to all bits.
  874. DemandedBits = APInt::getAllOnes(BitWidth);
  875. DemandedElts = APInt::getAllOnes(NumElts);
  876. } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
  877. // Not demanding any bits/elts from Op.
  878. return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
  879. } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
  880. // Limit search depth.
  881. return false;
  882. }
  883. KnownBits Known2;
  884. switch (Op.getOpcode()) {
  885. case ISD::TargetConstant:
  886. llvm_unreachable("Can't simplify this node");
  887. case ISD::SCALAR_TO_VECTOR: {
  888. if (!DemandedElts[0])
  889. return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
  890. KnownBits SrcKnown;
  891. SDValue Src = Op.getOperand(0);
  892. unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
  893. APInt SrcDemandedBits = DemandedBits.zextOrSelf(SrcBitWidth);
  894. if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
  895. return true;
  896. // Upper elements are undef, so only get the knownbits if we just demand
  897. // the bottom element.
  898. if (DemandedElts == 1)
  899. Known = SrcKnown.anyextOrTrunc(BitWidth);
  900. break;
  901. }
  902. case ISD::BUILD_VECTOR:
  903. // Collect the known bits that are shared by every demanded element.
  904. // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
  905. Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
  906. return false; // Don't fall through, will infinitely loop.
  907. case ISD::LOAD: {
  908. auto *LD = cast<LoadSDNode>(Op);
  909. if (getTargetConstantFromLoad(LD)) {
  910. Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
  911. return false; // Don't fall through, will infinitely loop.
  912. }
  913. if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
  914. // If this is a ZEXTLoad and we are looking at the loaded value.
  915. EVT MemVT = LD->getMemoryVT();
  916. unsigned MemBits = MemVT.getScalarSizeInBits();
  917. Known.Zero.setBitsFrom(MemBits);
  918. return false; // Don't fall through, will infinitely loop.
  919. }
  920. break;
  921. }
  922. case ISD::INSERT_VECTOR_ELT: {
  923. SDValue Vec = Op.getOperand(0);
  924. SDValue Scl = Op.getOperand(1);
  925. auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
  926. EVT VecVT = Vec.getValueType();
  927. // If index isn't constant, assume we need all vector elements AND the
  928. // inserted element.
  929. APInt DemandedVecElts(DemandedElts);
  930. if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
  931. unsigned Idx = CIdx->getZExtValue();
  932. DemandedVecElts.clearBit(Idx);
  933. // Inserted element is not required.
  934. if (!DemandedElts[Idx])
  935. return TLO.CombineTo(Op, Vec);
  936. }
  937. KnownBits KnownScl;
  938. unsigned NumSclBits = Scl.getScalarValueSizeInBits();
  939. APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
  940. if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
  941. return true;
  942. Known = KnownScl.anyextOrTrunc(BitWidth);
  943. KnownBits KnownVec;
  944. if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
  945. Depth + 1))
  946. return true;
  947. if (!!DemandedVecElts)
  948. Known = KnownBits::commonBits(Known, KnownVec);
  949. return false;
  950. }
  951. case ISD::INSERT_SUBVECTOR: {
  952. // Demand any elements from the subvector and the remainder from the src its
  953. // inserted into.
  954. SDValue Src = Op.getOperand(0);
  955. SDValue Sub = Op.getOperand(1);
  956. uint64_t Idx = Op.getConstantOperandVal(2);
  957. unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
  958. APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
  959. APInt DemandedSrcElts = DemandedElts;
  960. DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
  961. KnownBits KnownSub, KnownSrc;
  962. if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
  963. Depth + 1))
  964. return true;
  965. if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
  966. Depth + 1))
  967. return true;
  968. Known.Zero.setAllBits();
  969. Known.One.setAllBits();
  970. if (!!DemandedSubElts)
  971. Known = KnownBits::commonBits(Known, KnownSub);
  972. if (!!DemandedSrcElts)
  973. Known = KnownBits::commonBits(Known, KnownSrc);
  974. // Attempt to avoid multi-use src if we don't need anything from it.
  975. if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
  976. !DemandedSrcElts.isAllOnes()) {
  977. SDValue NewSub = SimplifyMultipleUseDemandedBits(
  978. Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
  979. SDValue NewSrc = SimplifyMultipleUseDemandedBits(
  980. Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
  981. if (NewSub || NewSrc) {
  982. NewSub = NewSub ? NewSub : Sub;
  983. NewSrc = NewSrc ? NewSrc : Src;
  984. SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
  985. Op.getOperand(2));
  986. return TLO.CombineTo(Op, NewOp);
  987. }
  988. }
  989. break;
  990. }
  991. case ISD::EXTRACT_SUBVECTOR: {
  992. // Offset the demanded elts by the subvector index.
  993. SDValue Src = Op.getOperand(0);
  994. if (Src.getValueType().isScalableVector())
  995. break;
  996. uint64_t Idx = Op.getConstantOperandVal(1);
  997. unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
  998. APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
  999. if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
  1000. Depth + 1))
  1001. return true;
  1002. // Attempt to avoid multi-use src if we don't need anything from it.
  1003. if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
  1004. SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
  1005. Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
  1006. if (DemandedSrc) {
  1007. SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
  1008. Op.getOperand(1));
  1009. return TLO.CombineTo(Op, NewOp);
  1010. }
  1011. }
  1012. break;
  1013. }
  1014. case ISD::CONCAT_VECTORS: {
  1015. Known.Zero.setAllBits();
  1016. Known.One.setAllBits();
  1017. EVT SubVT = Op.getOperand(0).getValueType();
  1018. unsigned NumSubVecs = Op.getNumOperands();
  1019. unsigned NumSubElts = SubVT.getVectorNumElements();
  1020. for (unsigned i = 0; i != NumSubVecs; ++i) {
  1021. APInt DemandedSubElts =
  1022. DemandedElts.extractBits(NumSubElts, i * NumSubElts);
  1023. if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
  1024. Known2, TLO, Depth + 1))
  1025. return true;
  1026. // Known bits are shared by every demanded subvector element.
  1027. if (!!DemandedSubElts)
  1028. Known = KnownBits::commonBits(Known, Known2);
  1029. }
  1030. break;
  1031. }
  1032. case ISD::VECTOR_SHUFFLE: {
  1033. ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
  1034. // Collect demanded elements from shuffle operands..
  1035. APInt DemandedLHS(NumElts, 0);
  1036. APInt DemandedRHS(NumElts, 0);
  1037. for (unsigned i = 0; i != NumElts; ++i) {
  1038. if (!DemandedElts[i])
  1039. continue;
  1040. int M = ShuffleMask[i];
  1041. if (M < 0) {
  1042. // For UNDEF elements, we don't know anything about the common state of
  1043. // the shuffle result.
  1044. DemandedLHS.clearAllBits();
  1045. DemandedRHS.clearAllBits();
  1046. break;
  1047. }
  1048. assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
  1049. if (M < (int)NumElts)
  1050. DemandedLHS.setBit(M);
  1051. else
  1052. DemandedRHS.setBit(M - NumElts);
  1053. }
  1054. if (!!DemandedLHS || !!DemandedRHS) {
  1055. SDValue Op0 = Op.getOperand(0);
  1056. SDValue Op1 = Op.getOperand(1);
  1057. Known.Zero.setAllBits();
  1058. Known.One.setAllBits();
  1059. if (!!DemandedLHS) {
  1060. if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
  1061. Depth + 1))
  1062. return true;
  1063. Known = KnownBits::commonBits(Known, Known2);
  1064. }
  1065. if (!!DemandedRHS) {
  1066. if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
  1067. Depth + 1))
  1068. return true;
  1069. Known = KnownBits::commonBits(Known, Known2);
  1070. }
  1071. // Attempt to avoid multi-use ops if we don't need anything from them.
  1072. SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
  1073. Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
  1074. SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
  1075. Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
  1076. if (DemandedOp0 || DemandedOp1) {
  1077. Op0 = DemandedOp0 ? DemandedOp0 : Op0;
  1078. Op1 = DemandedOp1 ? DemandedOp1 : Op1;
  1079. SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
  1080. return TLO.CombineTo(Op, NewOp);
  1081. }
  1082. }
  1083. break;
  1084. }
  1085. case ISD::AND: {
  1086. SDValue Op0 = Op.getOperand(0);
  1087. SDValue Op1 = Op.getOperand(1);
  1088. // If the RHS is a constant, check to see if the LHS would be zero without
  1089. // using the bits from the RHS. Below, we use knowledge about the RHS to
  1090. // simplify the LHS, here we're using information from the LHS to simplify
  1091. // the RHS.
  1092. if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1)) {
  1093. // Do not increment Depth here; that can cause an infinite loop.
  1094. KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
  1095. // If the LHS already has zeros where RHSC does, this 'and' is dead.
  1096. if ((LHSKnown.Zero & DemandedBits) ==
  1097. (~RHSC->getAPIntValue() & DemandedBits))
  1098. return TLO.CombineTo(Op, Op0);
  1099. // If any of the set bits in the RHS are known zero on the LHS, shrink
  1100. // the constant.
  1101. if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
  1102. DemandedElts, TLO))
  1103. return true;
  1104. // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
  1105. // constant, but if this 'and' is only clearing bits that were just set by
  1106. // the xor, then this 'and' can be eliminated by shrinking the mask of
  1107. // the xor. For example, for a 32-bit X:
  1108. // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
  1109. if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
  1110. LHSKnown.One == ~RHSC->getAPIntValue()) {
  1111. SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
  1112. return TLO.CombineTo(Op, Xor);
  1113. }
  1114. }
  1115. if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
  1116. Depth + 1))
  1117. return true;
  1118. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  1119. if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
  1120. Known2, TLO, Depth + 1))
  1121. return true;
  1122. assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
  1123. // Attempt to avoid multi-use ops if we don't need anything from them.
  1124. if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
  1125. SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
  1126. Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
  1127. SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
  1128. Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
  1129. if (DemandedOp0 || DemandedOp1) {
  1130. Op0 = DemandedOp0 ? DemandedOp0 : Op0;
  1131. Op1 = DemandedOp1 ? DemandedOp1 : Op1;
  1132. SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
  1133. return TLO.CombineTo(Op, NewOp);
  1134. }
  1135. }
  1136. // If all of the demanded bits are known one on one side, return the other.
  1137. // These bits cannot contribute to the result of the 'and'.
  1138. if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
  1139. return TLO.CombineTo(Op, Op0);
  1140. if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
  1141. return TLO.CombineTo(Op, Op1);
  1142. // If all of the demanded bits in the inputs are known zeros, return zero.
  1143. if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
  1144. return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
  1145. // If the RHS is a constant, see if we can simplify it.
  1146. if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
  1147. TLO))
  1148. return true;
  1149. // If the operation can be done in a smaller type, do so.
  1150. if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
  1151. return true;
  1152. Known &= Known2;
  1153. break;
  1154. }
  1155. case ISD::OR: {
  1156. SDValue Op0 = Op.getOperand(0);
  1157. SDValue Op1 = Op.getOperand(1);
  1158. if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
  1159. Depth + 1))
  1160. return true;
  1161. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  1162. if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
  1163. Known2, TLO, Depth + 1))
  1164. return true;
  1165. assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
  1166. // Attempt to avoid multi-use ops if we don't need anything from them.
  1167. if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
  1168. SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
  1169. Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
  1170. SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
  1171. Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
  1172. if (DemandedOp0 || DemandedOp1) {
  1173. Op0 = DemandedOp0 ? DemandedOp0 : Op0;
  1174. Op1 = DemandedOp1 ? DemandedOp1 : Op1;
  1175. SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
  1176. return TLO.CombineTo(Op, NewOp);
  1177. }
  1178. }
  1179. // If all of the demanded bits are known zero on one side, return the other.
  1180. // These bits cannot contribute to the result of the 'or'.
  1181. if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
  1182. return TLO.CombineTo(Op, Op0);
  1183. if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
  1184. return TLO.CombineTo(Op, Op1);
  1185. // If the RHS is a constant, see if we can simplify it.
  1186. if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
  1187. return true;
  1188. // If the operation can be done in a smaller type, do so.
  1189. if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
  1190. return true;
  1191. Known |= Known2;
  1192. break;
  1193. }
  1194. case ISD::XOR: {
  1195. SDValue Op0 = Op.getOperand(0);
  1196. SDValue Op1 = Op.getOperand(1);
  1197. if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
  1198. Depth + 1))
  1199. return true;
  1200. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  1201. if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
  1202. Depth + 1))
  1203. return true;
  1204. assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
  1205. // Attempt to avoid multi-use ops if we don't need anything from them.
  1206. if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
  1207. SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
  1208. Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
  1209. SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
  1210. Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
  1211. if (DemandedOp0 || DemandedOp1) {
  1212. Op0 = DemandedOp0 ? DemandedOp0 : Op0;
  1213. Op1 = DemandedOp1 ? DemandedOp1 : Op1;
  1214. SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
  1215. return TLO.CombineTo(Op, NewOp);
  1216. }
  1217. }
  1218. // If all of the demanded bits are known zero on one side, return the other.
  1219. // These bits cannot contribute to the result of the 'xor'.
  1220. if (DemandedBits.isSubsetOf(Known.Zero))
  1221. return TLO.CombineTo(Op, Op0);
  1222. if (DemandedBits.isSubsetOf(Known2.Zero))
  1223. return TLO.CombineTo(Op, Op1);
  1224. // If the operation can be done in a smaller type, do so.
  1225. if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
  1226. return true;
  1227. // If all of the unknown bits are known to be zero on one side or the other
  1228. // turn this into an *inclusive* or.
  1229. // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
  1230. if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
  1231. return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
  1232. ConstantSDNode* C = isConstOrConstSplat(Op1, DemandedElts);
  1233. if (C) {
  1234. // If one side is a constant, and all of the set bits in the constant are
  1235. // also known set on the other side, turn this into an AND, as we know
  1236. // the bits will be cleared.
  1237. // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
  1238. // NB: it is okay if more bits are known than are requested
  1239. if (C->getAPIntValue() == Known2.One) {
  1240. SDValue ANDC =
  1241. TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
  1242. return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
  1243. }
  1244. // If the RHS is a constant, see if we can change it. Don't alter a -1
  1245. // constant because that's a 'not' op, and that is better for combining
  1246. // and codegen.
  1247. if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
  1248. // We're flipping all demanded bits. Flip the undemanded bits too.
  1249. SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
  1250. return TLO.CombineTo(Op, New);
  1251. }
  1252. }
  1253. // If we can't turn this into a 'not', try to shrink the constant.
  1254. if (!C || !C->isAllOnes())
  1255. if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
  1256. return true;
  1257. Known ^= Known2;
  1258. break;
  1259. }
  1260. case ISD::SELECT:
  1261. if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known, TLO,
  1262. Depth + 1))
  1263. return true;
  1264. if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, Known2, TLO,
  1265. Depth + 1))
  1266. return true;
  1267. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  1268. assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
  1269. // If the operands are constants, see if we can simplify them.
  1270. if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
  1271. return true;
  1272. // Only known if known in both the LHS and RHS.
  1273. Known = KnownBits::commonBits(Known, Known2);
  1274. break;
  1275. case ISD::SELECT_CC:
  1276. if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, Known, TLO,
  1277. Depth + 1))
  1278. return true;
  1279. if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known2, TLO,
  1280. Depth + 1))
  1281. return true;
  1282. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  1283. assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
  1284. // If the operands are constants, see if we can simplify them.
  1285. if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
  1286. return true;
  1287. // Only known if known in both the LHS and RHS.
  1288. Known = KnownBits::commonBits(Known, Known2);
  1289. break;
  1290. case ISD::SETCC: {
  1291. SDValue Op0 = Op.getOperand(0);
  1292. SDValue Op1 = Op.getOperand(1);
  1293. ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
  1294. // If (1) we only need the sign-bit, (2) the setcc operands are the same
  1295. // width as the setcc result, and (3) the result of a setcc conforms to 0 or
  1296. // -1, we may be able to bypass the setcc.
  1297. if (DemandedBits.isSignMask() &&
  1298. Op0.getScalarValueSizeInBits() == BitWidth &&
  1299. getBooleanContents(Op0.getValueType()) ==
  1300. BooleanContent::ZeroOrNegativeOneBooleanContent) {
  1301. // If we're testing X < 0, then this compare isn't needed - just use X!
  1302. // FIXME: We're limiting to integer types here, but this should also work
  1303. // if we don't care about FP signed-zero. The use of SETLT with FP means
  1304. // that we don't care about NaNs.
  1305. if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
  1306. (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
  1307. return TLO.CombineTo(Op, Op0);
  1308. // TODO: Should we check for other forms of sign-bit comparisons?
  1309. // Examples: X <= -1, X >= 0
  1310. }
  1311. if (getBooleanContents(Op0.getValueType()) ==
  1312. TargetLowering::ZeroOrOneBooleanContent &&
  1313. BitWidth > 1)
  1314. Known.Zero.setBitsFrom(1);
  1315. break;
  1316. }
  1317. case ISD::SHL: {
  1318. SDValue Op0 = Op.getOperand(0);
  1319. SDValue Op1 = Op.getOperand(1);
  1320. EVT ShiftVT = Op1.getValueType();
  1321. if (const APInt *SA =
  1322. TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
  1323. unsigned ShAmt = SA->getZExtValue();
  1324. if (ShAmt == 0)
  1325. return TLO.CombineTo(Op, Op0);
  1326. // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
  1327. // single shift. We can do this if the bottom bits (which are shifted
  1328. // out) are never demanded.
  1329. // TODO - support non-uniform vector amounts.
  1330. if (Op0.getOpcode() == ISD::SRL) {
  1331. if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
  1332. if (const APInt *SA2 =
  1333. TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
  1334. unsigned C1 = SA2->getZExtValue();
  1335. unsigned Opc = ISD::SHL;
  1336. int Diff = ShAmt - C1;
  1337. if (Diff < 0) {
  1338. Diff = -Diff;
  1339. Opc = ISD::SRL;
  1340. }
  1341. SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
  1342. return TLO.CombineTo(
  1343. Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
  1344. }
  1345. }
  1346. }
  1347. // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
  1348. // are not demanded. This will likely allow the anyext to be folded away.
  1349. // TODO - support non-uniform vector amounts.
  1350. if (Op0.getOpcode() == ISD::ANY_EXTEND) {
  1351. SDValue InnerOp = Op0.getOperand(0);
  1352. EVT InnerVT = InnerOp.getValueType();
  1353. unsigned InnerBits = InnerVT.getScalarSizeInBits();
  1354. if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
  1355. isTypeDesirableForOp(ISD::SHL, InnerVT)) {
  1356. EVT ShTy = getShiftAmountTy(InnerVT, DL);
  1357. if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
  1358. ShTy = InnerVT;
  1359. SDValue NarrowShl =
  1360. TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp,
  1361. TLO.DAG.getConstant(ShAmt, dl, ShTy));
  1362. return TLO.CombineTo(
  1363. Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
  1364. }
  1365. // Repeat the SHL optimization above in cases where an extension
  1366. // intervenes: (shl (anyext (shr x, c1)), c2) to
  1367. // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
  1368. // aren't demanded (as above) and that the shifted upper c1 bits of
  1369. // x aren't demanded.
  1370. // TODO - support non-uniform vector amounts.
  1371. if (Op0.hasOneUse() && InnerOp.getOpcode() == ISD::SRL &&
  1372. InnerOp.hasOneUse()) {
  1373. if (const APInt *SA2 =
  1374. TLO.DAG.getValidShiftAmountConstant(InnerOp, DemandedElts)) {
  1375. unsigned InnerShAmt = SA2->getZExtValue();
  1376. if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
  1377. DemandedBits.getActiveBits() <=
  1378. (InnerBits - InnerShAmt + ShAmt) &&
  1379. DemandedBits.countTrailingZeros() >= ShAmt) {
  1380. SDValue NewSA =
  1381. TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
  1382. SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
  1383. InnerOp.getOperand(0));
  1384. return TLO.CombineTo(
  1385. Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
  1386. }
  1387. }
  1388. }
  1389. }
  1390. APInt InDemandedMask = DemandedBits.lshr(ShAmt);
  1391. if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
  1392. Depth + 1))
  1393. return true;
  1394. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  1395. Known.Zero <<= ShAmt;
  1396. Known.One <<= ShAmt;
  1397. // low bits known zero.
  1398. Known.Zero.setLowBits(ShAmt);
  1399. // Try shrinking the operation as long as the shift amount will still be
  1400. // in range.
  1401. if ((ShAmt < DemandedBits.getActiveBits()) &&
  1402. ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
  1403. return true;
  1404. }
  1405. // If we are only demanding sign bits then we can use the shift source
  1406. // directly.
  1407. if (const APInt *MaxSA =
  1408. TLO.DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
  1409. unsigned ShAmt = MaxSA->getZExtValue();
  1410. unsigned NumSignBits =
  1411. TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
  1412. unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
  1413. if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
  1414. return TLO.CombineTo(Op, Op0);
  1415. }
  1416. break;
  1417. }
  1418. case ISD::SRL: {
  1419. SDValue Op0 = Op.getOperand(0);
  1420. SDValue Op1 = Op.getOperand(1);
  1421. EVT ShiftVT = Op1.getValueType();
  1422. if (const APInt *SA =
  1423. TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
  1424. unsigned ShAmt = SA->getZExtValue();
  1425. if (ShAmt == 0)
  1426. return TLO.CombineTo(Op, Op0);
  1427. // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
  1428. // single shift. We can do this if the top bits (which are shifted out)
  1429. // are never demanded.
  1430. // TODO - support non-uniform vector amounts.
  1431. if (Op0.getOpcode() == ISD::SHL) {
  1432. if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
  1433. if (const APInt *SA2 =
  1434. TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
  1435. unsigned C1 = SA2->getZExtValue();
  1436. unsigned Opc = ISD::SRL;
  1437. int Diff = ShAmt - C1;
  1438. if (Diff < 0) {
  1439. Diff = -Diff;
  1440. Opc = ISD::SHL;
  1441. }
  1442. SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
  1443. return TLO.CombineTo(
  1444. Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
  1445. }
  1446. }
  1447. }
  1448. APInt InDemandedMask = (DemandedBits << ShAmt);
  1449. // If the shift is exact, then it does demand the low bits (and knows that
  1450. // they are zero).
  1451. if (Op->getFlags().hasExact())
  1452. InDemandedMask.setLowBits(ShAmt);
  1453. // Compute the new bits that are at the top now.
  1454. if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
  1455. Depth + 1))
  1456. return true;
  1457. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  1458. Known.Zero.lshrInPlace(ShAmt);
  1459. Known.One.lshrInPlace(ShAmt);
  1460. // High bits known zero.
  1461. Known.Zero.setHighBits(ShAmt);
  1462. }
  1463. break;
  1464. }
  1465. case ISD::SRA: {
  1466. SDValue Op0 = Op.getOperand(0);
  1467. SDValue Op1 = Op.getOperand(1);
  1468. EVT ShiftVT = Op1.getValueType();
  1469. // If we only want bits that already match the signbit then we don't need
  1470. // to shift.
  1471. unsigned NumHiDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
  1472. if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
  1473. NumHiDemandedBits)
  1474. return TLO.CombineTo(Op, Op0);
  1475. // If this is an arithmetic shift right and only the low-bit is set, we can
  1476. // always convert this into a logical shr, even if the shift amount is
  1477. // variable. The low bit of the shift cannot be an input sign bit unless
  1478. // the shift amount is >= the size of the datatype, which is undefined.
  1479. if (DemandedBits.isOne())
  1480. return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
  1481. if (const APInt *SA =
  1482. TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
  1483. unsigned ShAmt = SA->getZExtValue();
  1484. if (ShAmt == 0)
  1485. return TLO.CombineTo(Op, Op0);
  1486. APInt InDemandedMask = (DemandedBits << ShAmt);
  1487. // If the shift is exact, then it does demand the low bits (and knows that
  1488. // they are zero).
  1489. if (Op->getFlags().hasExact())
  1490. InDemandedMask.setLowBits(ShAmt);
  1491. // If any of the demanded bits are produced by the sign extension, we also
  1492. // demand the input sign bit.
  1493. if (DemandedBits.countLeadingZeros() < ShAmt)
  1494. InDemandedMask.setSignBit();
  1495. if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
  1496. Depth + 1))
  1497. return true;
  1498. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  1499. Known.Zero.lshrInPlace(ShAmt);
  1500. Known.One.lshrInPlace(ShAmt);
  1501. // If the input sign bit is known to be zero, or if none of the top bits
  1502. // are demanded, turn this into an unsigned shift right.
  1503. if (Known.Zero[BitWidth - ShAmt - 1] ||
  1504. DemandedBits.countLeadingZeros() >= ShAmt) {
  1505. SDNodeFlags Flags;
  1506. Flags.setExact(Op->getFlags().hasExact());
  1507. return TLO.CombineTo(
  1508. Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
  1509. }
  1510. int Log2 = DemandedBits.exactLogBase2();
  1511. if (Log2 >= 0) {
  1512. // The bit must come from the sign.
  1513. SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
  1514. return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
  1515. }
  1516. if (Known.One[BitWidth - ShAmt - 1])
  1517. // New bits are known one.
  1518. Known.One.setHighBits(ShAmt);
  1519. // Attempt to avoid multi-use ops if we don't need anything from them.
  1520. if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
  1521. SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
  1522. Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
  1523. if (DemandedOp0) {
  1524. SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
  1525. return TLO.CombineTo(Op, NewOp);
  1526. }
  1527. }
  1528. }
  1529. break;
  1530. }
  1531. case ISD::FSHL:
  1532. case ISD::FSHR: {
  1533. SDValue Op0 = Op.getOperand(0);
  1534. SDValue Op1 = Op.getOperand(1);
  1535. SDValue Op2 = Op.getOperand(2);
  1536. bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
  1537. if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
  1538. unsigned Amt = SA->getAPIntValue().urem(BitWidth);
  1539. // For fshl, 0-shift returns the 1st arg.
  1540. // For fshr, 0-shift returns the 2nd arg.
  1541. if (Amt == 0) {
  1542. if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
  1543. Known, TLO, Depth + 1))
  1544. return true;
  1545. break;
  1546. }
  1547. // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
  1548. // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
  1549. APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
  1550. APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
  1551. if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
  1552. Depth + 1))
  1553. return true;
  1554. if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
  1555. Depth + 1))
  1556. return true;
  1557. Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
  1558. Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
  1559. Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
  1560. Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
  1561. Known.One |= Known2.One;
  1562. Known.Zero |= Known2.Zero;
  1563. }
  1564. // For pow-2 bitwidths we only demand the bottom modulo amt bits.
  1565. if (isPowerOf2_32(BitWidth)) {
  1566. APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
  1567. if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
  1568. Known2, TLO, Depth + 1))
  1569. return true;
  1570. }
  1571. break;
  1572. }
  1573. case ISD::ROTL:
  1574. case ISD::ROTR: {
  1575. SDValue Op0 = Op.getOperand(0);
  1576. SDValue Op1 = Op.getOperand(1);
  1577. bool IsROTL = (Op.getOpcode() == ISD::ROTL);
  1578. // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
  1579. if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
  1580. return TLO.CombineTo(Op, Op0);
  1581. if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
  1582. unsigned Amt = SA->getAPIntValue().urem(BitWidth);
  1583. unsigned RevAmt = BitWidth - Amt;
  1584. // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
  1585. // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
  1586. APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
  1587. if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
  1588. Depth + 1))
  1589. return true;
  1590. // rot*(x, 0) --> x
  1591. if (Amt == 0)
  1592. return TLO.CombineTo(Op, Op0);
  1593. // See if we don't demand either half of the rotated bits.
  1594. if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
  1595. DemandedBits.countTrailingZeros() >= (IsROTL ? Amt : RevAmt)) {
  1596. Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
  1597. return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
  1598. }
  1599. if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
  1600. DemandedBits.countLeadingZeros() >= (IsROTL ? RevAmt : Amt)) {
  1601. Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
  1602. return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
  1603. }
  1604. }
  1605. // For pow-2 bitwidths we only demand the bottom modulo amt bits.
  1606. if (isPowerOf2_32(BitWidth)) {
  1607. APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
  1608. if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
  1609. Depth + 1))
  1610. return true;
  1611. }
  1612. break;
  1613. }
  1614. case ISD::UMIN: {
  1615. // Check if one arg is always less than (or equal) to the other arg.
  1616. SDValue Op0 = Op.getOperand(0);
  1617. SDValue Op1 = Op.getOperand(1);
  1618. KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
  1619. KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
  1620. Known = KnownBits::umin(Known0, Known1);
  1621. if (Optional<bool> IsULE = KnownBits::ule(Known0, Known1))
  1622. return TLO.CombineTo(Op, IsULE.getValue() ? Op0 : Op1);
  1623. if (Optional<bool> IsULT = KnownBits::ult(Known0, Known1))
  1624. return TLO.CombineTo(Op, IsULT.getValue() ? Op0 : Op1);
  1625. break;
  1626. }
  1627. case ISD::UMAX: {
  1628. // Check if one arg is always greater than (or equal) to the other arg.
  1629. SDValue Op0 = Op.getOperand(0);
  1630. SDValue Op1 = Op.getOperand(1);
  1631. KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
  1632. KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
  1633. Known = KnownBits::umax(Known0, Known1);
  1634. if (Optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
  1635. return TLO.CombineTo(Op, IsUGE.getValue() ? Op0 : Op1);
  1636. if (Optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
  1637. return TLO.CombineTo(Op, IsUGT.getValue() ? Op0 : Op1);
  1638. break;
  1639. }
  1640. case ISD::BITREVERSE: {
  1641. SDValue Src = Op.getOperand(0);
  1642. APInt DemandedSrcBits = DemandedBits.reverseBits();
  1643. if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
  1644. Depth + 1))
  1645. return true;
  1646. Known.One = Known2.One.reverseBits();
  1647. Known.Zero = Known2.Zero.reverseBits();
  1648. break;
  1649. }
  1650. case ISD::BSWAP: {
  1651. SDValue Src = Op.getOperand(0);
  1652. // If the only bits demanded come from one byte of the bswap result,
  1653. // just shift the input byte into position to eliminate the bswap.
  1654. unsigned NLZ = DemandedBits.countLeadingZeros();
  1655. unsigned NTZ = DemandedBits.countTrailingZeros();
  1656. // Round NTZ down to the next byte. If we have 11 trailing zeros, then
  1657. // we need all the bits down to bit 8. Likewise, round NLZ. If we
  1658. // have 14 leading zeros, round to 8.
  1659. NLZ = alignDown(NLZ, 8);
  1660. NTZ = alignDown(NTZ, 8);
  1661. // If we need exactly one byte, we can do this transformation.
  1662. if (BitWidth - NLZ - NTZ == 8) {
  1663. // Replace this with either a left or right shift to get the byte into
  1664. // the right place.
  1665. unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
  1666. if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
  1667. EVT ShiftAmtTy = getShiftAmountTy(VT, DL);
  1668. unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
  1669. SDValue ShAmt = TLO.DAG.getConstant(ShiftAmount, dl, ShiftAmtTy);
  1670. SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
  1671. return TLO.CombineTo(Op, NewOp);
  1672. }
  1673. }
  1674. APInt DemandedSrcBits = DemandedBits.byteSwap();
  1675. if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
  1676. Depth + 1))
  1677. return true;
  1678. Known.One = Known2.One.byteSwap();
  1679. Known.Zero = Known2.Zero.byteSwap();
  1680. break;
  1681. }
  1682. case ISD::CTPOP: {
  1683. // If only 1 bit is demanded, replace with PARITY as long as we're before
  1684. // op legalization.
  1685. // FIXME: Limit to scalars for now.
  1686. if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
  1687. return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
  1688. Op.getOperand(0)));
  1689. Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
  1690. break;
  1691. }
  1692. case ISD::SIGN_EXTEND_INREG: {
  1693. SDValue Op0 = Op.getOperand(0);
  1694. EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
  1695. unsigned ExVTBits = ExVT.getScalarSizeInBits();
  1696. // If we only care about the highest bit, don't bother shifting right.
  1697. if (DemandedBits.isSignMask()) {
  1698. unsigned MinSignedBits =
  1699. TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts, Depth + 1);
  1700. bool AlreadySignExtended = ExVTBits >= MinSignedBits;
  1701. // However if the input is already sign extended we expect the sign
  1702. // extension to be dropped altogether later and do not simplify.
  1703. if (!AlreadySignExtended) {
  1704. // Compute the correct shift amount type, which must be getShiftAmountTy
  1705. // for scalar types after legalization.
  1706. SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ExVTBits, dl,
  1707. getShiftAmountTy(VT, DL));
  1708. return TLO.CombineTo(Op,
  1709. TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
  1710. }
  1711. }
  1712. // If none of the extended bits are demanded, eliminate the sextinreg.
  1713. if (DemandedBits.getActiveBits() <= ExVTBits)
  1714. return TLO.CombineTo(Op, Op0);
  1715. APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
  1716. // Since the sign extended bits are demanded, we know that the sign
  1717. // bit is demanded.
  1718. InputDemandedBits.setBit(ExVTBits - 1);
  1719. if (SimplifyDemandedBits(Op0, InputDemandedBits, Known, TLO, Depth + 1))
  1720. return true;
  1721. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  1722. // If the sign bit of the input is known set or clear, then we know the
  1723. // top bits of the result.
  1724. // If the input sign bit is known zero, convert this into a zero extension.
  1725. if (Known.Zero[ExVTBits - 1])
  1726. return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
  1727. APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
  1728. if (Known.One[ExVTBits - 1]) { // Input sign bit known set
  1729. Known.One.setBitsFrom(ExVTBits);
  1730. Known.Zero &= Mask;
  1731. } else { // Input sign bit unknown
  1732. Known.Zero &= Mask;
  1733. Known.One &= Mask;
  1734. }
  1735. break;
  1736. }
  1737. case ISD::BUILD_PAIR: {
  1738. EVT HalfVT = Op.getOperand(0).getValueType();
  1739. unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
  1740. APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
  1741. APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
  1742. KnownBits KnownLo, KnownHi;
  1743. if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
  1744. return true;
  1745. if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
  1746. return true;
  1747. Known.Zero = KnownLo.Zero.zext(BitWidth) |
  1748. KnownHi.Zero.zext(BitWidth).shl(HalfBitWidth);
  1749. Known.One = KnownLo.One.zext(BitWidth) |
  1750. KnownHi.One.zext(BitWidth).shl(HalfBitWidth);
  1751. break;
  1752. }
  1753. case ISD::ZERO_EXTEND:
  1754. case ISD::ZERO_EXTEND_VECTOR_INREG: {
  1755. SDValue Src = Op.getOperand(0);
  1756. EVT SrcVT = Src.getValueType();
  1757. unsigned InBits = SrcVT.getScalarSizeInBits();
  1758. unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
  1759. bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
  1760. // If none of the top bits are demanded, convert this into an any_extend.
  1761. if (DemandedBits.getActiveBits() <= InBits) {
  1762. // If we only need the non-extended bits of the bottom element
  1763. // then we can just bitcast to the result.
  1764. if (IsLE && IsVecInReg && DemandedElts == 1 &&
  1765. VT.getSizeInBits() == SrcVT.getSizeInBits())
  1766. return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
  1767. unsigned Opc =
  1768. IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
  1769. if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
  1770. return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
  1771. }
  1772. APInt InDemandedBits = DemandedBits.trunc(InBits);
  1773. APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
  1774. if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
  1775. Depth + 1))
  1776. return true;
  1777. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  1778. assert(Known.getBitWidth() == InBits && "Src width has changed?");
  1779. Known = Known.zext(BitWidth);
  1780. // Attempt to avoid multi-use ops if we don't need anything from them.
  1781. if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
  1782. Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
  1783. return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
  1784. break;
  1785. }
  1786. case ISD::SIGN_EXTEND:
  1787. case ISD::SIGN_EXTEND_VECTOR_INREG: {
  1788. SDValue Src = Op.getOperand(0);
  1789. EVT SrcVT = Src.getValueType();
  1790. unsigned InBits = SrcVT.getScalarSizeInBits();
  1791. unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
  1792. bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
  1793. // If none of the top bits are demanded, convert this into an any_extend.
  1794. if (DemandedBits.getActiveBits() <= InBits) {
  1795. // If we only need the non-extended bits of the bottom element
  1796. // then we can just bitcast to the result.
  1797. if (IsLE && IsVecInReg && DemandedElts == 1 &&
  1798. VT.getSizeInBits() == SrcVT.getSizeInBits())
  1799. return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
  1800. unsigned Opc =
  1801. IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
  1802. if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
  1803. return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
  1804. }
  1805. APInt InDemandedBits = DemandedBits.trunc(InBits);
  1806. APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
  1807. // Since some of the sign extended bits are demanded, we know that the sign
  1808. // bit is demanded.
  1809. InDemandedBits.setBit(InBits - 1);
  1810. if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
  1811. Depth + 1))
  1812. return true;
  1813. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  1814. assert(Known.getBitWidth() == InBits && "Src width has changed?");
  1815. // If the sign bit is known one, the top bits match.
  1816. Known = Known.sext(BitWidth);
  1817. // If the sign bit is known zero, convert this to a zero extend.
  1818. if (Known.isNonNegative()) {
  1819. unsigned Opc =
  1820. IsVecInReg ? ISD::ZERO_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND;
  1821. if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
  1822. return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
  1823. }
  1824. // Attempt to avoid multi-use ops if we don't need anything from them.
  1825. if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
  1826. Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
  1827. return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
  1828. break;
  1829. }
  1830. case ISD::ANY_EXTEND:
  1831. case ISD::ANY_EXTEND_VECTOR_INREG: {
  1832. SDValue Src = Op.getOperand(0);
  1833. EVT SrcVT = Src.getValueType();
  1834. unsigned InBits = SrcVT.getScalarSizeInBits();
  1835. unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
  1836. bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
  1837. // If we only need the bottom element then we can just bitcast.
  1838. // TODO: Handle ANY_EXTEND?
  1839. if (IsLE && IsVecInReg && DemandedElts == 1 &&
  1840. VT.getSizeInBits() == SrcVT.getSizeInBits())
  1841. return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
  1842. APInt InDemandedBits = DemandedBits.trunc(InBits);
  1843. APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
  1844. if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
  1845. Depth + 1))
  1846. return true;
  1847. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  1848. assert(Known.getBitWidth() == InBits && "Src width has changed?");
  1849. Known = Known.anyext(BitWidth);
  1850. // Attempt to avoid multi-use ops if we don't need anything from them.
  1851. if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
  1852. Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
  1853. return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
  1854. break;
  1855. }
  1856. case ISD::TRUNCATE: {
  1857. SDValue Src = Op.getOperand(0);
  1858. // Simplify the input, using demanded bit information, and compute the known
  1859. // zero/one bits live out.
  1860. unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
  1861. APInt TruncMask = DemandedBits.zext(OperandBitWidth);
  1862. if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
  1863. Depth + 1))
  1864. return true;
  1865. Known = Known.trunc(BitWidth);
  1866. // Attempt to avoid multi-use ops if we don't need anything from them.
  1867. if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
  1868. Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
  1869. return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
  1870. // If the input is only used by this truncate, see if we can shrink it based
  1871. // on the known demanded bits.
  1872. if (Src.getNode()->hasOneUse()) {
  1873. switch (Src.getOpcode()) {
  1874. default:
  1875. break;
  1876. case ISD::SRL:
  1877. // Shrink SRL by a constant if none of the high bits shifted in are
  1878. // demanded.
  1879. if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
  1880. // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
  1881. // undesirable.
  1882. break;
  1883. const APInt *ShAmtC =
  1884. TLO.DAG.getValidShiftAmountConstant(Src, DemandedElts);
  1885. if (!ShAmtC || ShAmtC->uge(BitWidth))
  1886. break;
  1887. uint64_t ShVal = ShAmtC->getZExtValue();
  1888. APInt HighBits =
  1889. APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
  1890. HighBits.lshrInPlace(ShVal);
  1891. HighBits = HighBits.trunc(BitWidth);
  1892. if (!(HighBits & DemandedBits)) {
  1893. // None of the shifted in bits are needed. Add a truncate of the
  1894. // shift input, then shift it.
  1895. SDValue NewShAmt = TLO.DAG.getConstant(
  1896. ShVal, dl, getShiftAmountTy(VT, DL, TLO.LegalTypes()));
  1897. SDValue NewTrunc =
  1898. TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
  1899. return TLO.CombineTo(
  1900. Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
  1901. }
  1902. break;
  1903. }
  1904. }
  1905. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  1906. break;
  1907. }
  1908. case ISD::AssertZext: {
  1909. // AssertZext demands all of the high bits, plus any of the low bits
  1910. // demanded by its users.
  1911. EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
  1912. APInt InMask = APInt::getLowBitsSet(BitWidth, ZVT.getSizeInBits());
  1913. if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
  1914. TLO, Depth + 1))
  1915. return true;
  1916. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  1917. Known.Zero |= ~InMask;
  1918. break;
  1919. }
  1920. case ISD::EXTRACT_VECTOR_ELT: {
  1921. SDValue Src = Op.getOperand(0);
  1922. SDValue Idx = Op.getOperand(1);
  1923. ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
  1924. unsigned EltBitWidth = Src.getScalarValueSizeInBits();
  1925. if (SrcEltCnt.isScalable())
  1926. return false;
  1927. // Demand the bits from every vector element without a constant index.
  1928. unsigned NumSrcElts = SrcEltCnt.getFixedValue();
  1929. APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
  1930. if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
  1931. if (CIdx->getAPIntValue().ult(NumSrcElts))
  1932. DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
  1933. // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
  1934. // anything about the extended bits.
  1935. APInt DemandedSrcBits = DemandedBits;
  1936. if (BitWidth > EltBitWidth)
  1937. DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
  1938. if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
  1939. Depth + 1))
  1940. return true;
  1941. // Attempt to avoid multi-use ops if we don't need anything from them.
  1942. if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
  1943. if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
  1944. Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
  1945. SDValue NewOp =
  1946. TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
  1947. return TLO.CombineTo(Op, NewOp);
  1948. }
  1949. }
  1950. Known = Known2;
  1951. if (BitWidth > EltBitWidth)
  1952. Known = Known.anyext(BitWidth);
  1953. break;
  1954. }
  1955. case ISD::BITCAST: {
  1956. SDValue Src = Op.getOperand(0);
  1957. EVT SrcVT = Src.getValueType();
  1958. unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
  1959. // If this is an FP->Int bitcast and if the sign bit is the only
  1960. // thing demanded, turn this into a FGETSIGN.
  1961. if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
  1962. DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
  1963. SrcVT.isFloatingPoint()) {
  1964. bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
  1965. bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
  1966. if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
  1967. SrcVT != MVT::f128) {
  1968. // Cannot eliminate/lower SHL for f128 yet.
  1969. EVT Ty = OpVTLegal ? VT : MVT::i32;
  1970. // Make a FGETSIGN + SHL to move the sign bit into the appropriate
  1971. // place. We expect the SHL to be eliminated by other optimizations.
  1972. SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
  1973. unsigned OpVTSizeInBits = Op.getValueSizeInBits();
  1974. if (!OpVTLegal && OpVTSizeInBits > 32)
  1975. Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
  1976. unsigned ShVal = Op.getValueSizeInBits() - 1;
  1977. SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
  1978. return TLO.CombineTo(Op,
  1979. TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
  1980. }
  1981. }
  1982. // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
  1983. // Demand the elt/bit if any of the original elts/bits are demanded.
  1984. if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
  1985. unsigned Scale = BitWidth / NumSrcEltBits;
  1986. unsigned NumSrcElts = SrcVT.getVectorNumElements();
  1987. APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
  1988. APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
  1989. for (unsigned i = 0; i != Scale; ++i) {
  1990. unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
  1991. unsigned BitOffset = EltOffset * NumSrcEltBits;
  1992. APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
  1993. if (!Sub.isZero()) {
  1994. DemandedSrcBits |= Sub;
  1995. for (unsigned j = 0; j != NumElts; ++j)
  1996. if (DemandedElts[j])
  1997. DemandedSrcElts.setBit((j * Scale) + i);
  1998. }
  1999. }
  2000. APInt KnownSrcUndef, KnownSrcZero;
  2001. if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
  2002. KnownSrcZero, TLO, Depth + 1))
  2003. return true;
  2004. KnownBits KnownSrcBits;
  2005. if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
  2006. KnownSrcBits, TLO, Depth + 1))
  2007. return true;
  2008. } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
  2009. // TODO - bigendian once we have test coverage.
  2010. unsigned Scale = NumSrcEltBits / BitWidth;
  2011. unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
  2012. APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
  2013. APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
  2014. for (unsigned i = 0; i != NumElts; ++i)
  2015. if (DemandedElts[i]) {
  2016. unsigned Offset = (i % Scale) * BitWidth;
  2017. DemandedSrcBits.insertBits(DemandedBits, Offset);
  2018. DemandedSrcElts.setBit(i / Scale);
  2019. }
  2020. if (SrcVT.isVector()) {
  2021. APInt KnownSrcUndef, KnownSrcZero;
  2022. if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
  2023. KnownSrcZero, TLO, Depth + 1))
  2024. return true;
  2025. }
  2026. KnownBits KnownSrcBits;
  2027. if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
  2028. KnownSrcBits, TLO, Depth + 1))
  2029. return true;
  2030. }
  2031. // If this is a bitcast, let computeKnownBits handle it. Only do this on a
  2032. // recursive call where Known may be useful to the caller.
  2033. if (Depth > 0) {
  2034. Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
  2035. return false;
  2036. }
  2037. break;
  2038. }
  2039. case ISD::MUL:
  2040. // 'Quadratic Reciprocity': mul(x,x) -> 0 if we're only demanding bit[1]
  2041. if (DemandedBits == 2 && Op.getOperand(0) == Op.getOperand(1))
  2042. return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
  2043. LLVM_FALLTHROUGH;
  2044. case ISD::ADD:
  2045. case ISD::SUB: {
  2046. // Add, Sub, and Mul don't demand any bits in positions beyond that
  2047. // of the highest bit demanded of them.
  2048. SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
  2049. SDNodeFlags Flags = Op.getNode()->getFlags();
  2050. unsigned DemandedBitsLZ = DemandedBits.countLeadingZeros();
  2051. APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
  2052. if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO,
  2053. Depth + 1) ||
  2054. SimplifyDemandedBits(Op1, LoMask, DemandedElts, Known2, TLO,
  2055. Depth + 1) ||
  2056. // See if the operation should be performed at a smaller bit width.
  2057. ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
  2058. if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
  2059. // Disable the nsw and nuw flags. We can no longer guarantee that we
  2060. // won't wrap after simplification.
  2061. Flags.setNoSignedWrap(false);
  2062. Flags.setNoUnsignedWrap(false);
  2063. SDValue NewOp =
  2064. TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
  2065. return TLO.CombineTo(Op, NewOp);
  2066. }
  2067. return true;
  2068. }
  2069. // Attempt to avoid multi-use ops if we don't need anything from them.
  2070. if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
  2071. SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
  2072. Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
  2073. SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
  2074. Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
  2075. if (DemandedOp0 || DemandedOp1) {
  2076. Flags.setNoSignedWrap(false);
  2077. Flags.setNoUnsignedWrap(false);
  2078. Op0 = DemandedOp0 ? DemandedOp0 : Op0;
  2079. Op1 = DemandedOp1 ? DemandedOp1 : Op1;
  2080. SDValue NewOp =
  2081. TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
  2082. return TLO.CombineTo(Op, NewOp);
  2083. }
  2084. }
  2085. // If we have a constant operand, we may be able to turn it into -1 if we
  2086. // do not demand the high bits. This can make the constant smaller to
  2087. // encode, allow more general folding, or match specialized instruction
  2088. // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
  2089. // is probably not useful (and could be detrimental).
  2090. ConstantSDNode *C = isConstOrConstSplat(Op1);
  2091. APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
  2092. if (C && !C->isAllOnes() && !C->isOne() &&
  2093. (C->getAPIntValue() | HighMask).isAllOnes()) {
  2094. SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
  2095. // Disable the nsw and nuw flags. We can no longer guarantee that we
  2096. // won't wrap after simplification.
  2097. Flags.setNoSignedWrap(false);
  2098. Flags.setNoUnsignedWrap(false);
  2099. SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags);
  2100. return TLO.CombineTo(Op, NewOp);
  2101. }
  2102. LLVM_FALLTHROUGH;
  2103. }
  2104. default:
  2105. if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
  2106. if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
  2107. Known, TLO, Depth))
  2108. return true;
  2109. break;
  2110. }
  2111. // Just use computeKnownBits to compute output bits.
  2112. Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
  2113. break;
  2114. }
  2115. // If we know the value of all of the demanded bits, return this as a
  2116. // constant.
  2117. if (DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
  2118. // Avoid folding to a constant if any OpaqueConstant is involved.
  2119. const SDNode *N = Op.getNode();
  2120. for (SDNode *Op :
  2121. llvm::make_range(SDNodeIterator::begin(N), SDNodeIterator::end(N))) {
  2122. if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
  2123. if (C->isOpaque())
  2124. return false;
  2125. }
  2126. if (VT.isInteger())
  2127. return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
  2128. if (VT.isFloatingPoint())
  2129. return TLO.CombineTo(
  2130. Op,
  2131. TLO.DAG.getConstantFP(
  2132. APFloat(TLO.DAG.EVTToAPFloatSemantics(VT), Known.One), dl, VT));
  2133. }
  2134. return false;
  2135. }
  2136. bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
  2137. const APInt &DemandedElts,
  2138. APInt &KnownUndef,
  2139. APInt &KnownZero,
  2140. DAGCombinerInfo &DCI) const {
  2141. SelectionDAG &DAG = DCI.DAG;
  2142. TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
  2143. !DCI.isBeforeLegalizeOps());
  2144. bool Simplified =
  2145. SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
  2146. if (Simplified) {
  2147. DCI.AddToWorklist(Op.getNode());
  2148. DCI.CommitTargetLoweringOpt(TLO);
  2149. }
  2150. return Simplified;
  2151. }
  2152. /// Given a vector binary operation and known undefined elements for each input
  2153. /// operand, compute whether each element of the output is undefined.
  2154. static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
  2155. const APInt &UndefOp0,
  2156. const APInt &UndefOp1) {
  2157. EVT VT = BO.getValueType();
  2158. assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() &&
  2159. "Vector binop only");
  2160. EVT EltVT = VT.getVectorElementType();
  2161. unsigned NumElts = VT.getVectorNumElements();
  2162. assert(UndefOp0.getBitWidth() == NumElts &&
  2163. UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
  2164. auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
  2165. const APInt &UndefVals) {
  2166. if (UndefVals[Index])
  2167. return DAG.getUNDEF(EltVT);
  2168. if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
  2169. // Try hard to make sure that the getNode() call is not creating temporary
  2170. // nodes. Ignore opaque integers because they do not constant fold.
  2171. SDValue Elt = BV->getOperand(Index);
  2172. auto *C = dyn_cast<ConstantSDNode>(Elt);
  2173. if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
  2174. return Elt;
  2175. }
  2176. return SDValue();
  2177. };
  2178. APInt KnownUndef = APInt::getZero(NumElts);
  2179. for (unsigned i = 0; i != NumElts; ++i) {
  2180. // If both inputs for this element are either constant or undef and match
  2181. // the element type, compute the constant/undef result for this element of
  2182. // the vector.
  2183. // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
  2184. // not handle FP constants. The code within getNode() should be refactored
  2185. // to avoid the danger of creating a bogus temporary node here.
  2186. SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
  2187. SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
  2188. if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
  2189. if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
  2190. KnownUndef.setBit(i);
  2191. }
  2192. return KnownUndef;
  2193. }
  2194. bool TargetLowering::SimplifyDemandedVectorElts(
  2195. SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
  2196. APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
  2197. bool AssumeSingleUse) const {
  2198. EVT VT = Op.getValueType();
  2199. unsigned Opcode = Op.getOpcode();
  2200. APInt DemandedElts = OriginalDemandedElts;
  2201. unsigned NumElts = DemandedElts.getBitWidth();
  2202. assert(VT.isVector() && "Expected vector op");
  2203. KnownUndef = KnownZero = APInt::getZero(NumElts);
  2204. // TODO: For now we assume we know nothing about scalable vectors.
  2205. if (VT.isScalableVector())
  2206. return false;
  2207. assert(VT.getVectorNumElements() == NumElts &&
  2208. "Mask size mismatches value type element count!");
  2209. // Undef operand.
  2210. if (Op.isUndef()) {
  2211. KnownUndef.setAllBits();
  2212. return false;
  2213. }
  2214. // If Op has other users, assume that all elements are needed.
  2215. if (!Op.getNode()->hasOneUse() && !AssumeSingleUse)
  2216. DemandedElts.setAllBits();
  2217. // Not demanding any elements from Op.
  2218. if (DemandedElts == 0) {
  2219. KnownUndef.setAllBits();
  2220. return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
  2221. }
  2222. // Limit search depth.
  2223. if (Depth >= SelectionDAG::MaxRecursionDepth)
  2224. return false;
  2225. SDLoc DL(Op);
  2226. unsigned EltSizeInBits = VT.getScalarSizeInBits();
  2227. bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
  2228. // Helper for demanding the specified elements and all the bits of both binary
  2229. // operands.
  2230. auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
  2231. SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
  2232. TLO.DAG, Depth + 1);
  2233. SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
  2234. TLO.DAG, Depth + 1);
  2235. if (NewOp0 || NewOp1) {
  2236. SDValue NewOp = TLO.DAG.getNode(
  2237. Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0, NewOp1 ? NewOp1 : Op1);
  2238. return TLO.CombineTo(Op, NewOp);
  2239. }
  2240. return false;
  2241. };
  2242. switch (Opcode) {
  2243. case ISD::SCALAR_TO_VECTOR: {
  2244. if (!DemandedElts[0]) {
  2245. KnownUndef.setAllBits();
  2246. return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
  2247. }
  2248. SDValue ScalarSrc = Op.getOperand(0);
  2249. if (ScalarSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
  2250. SDValue Src = ScalarSrc.getOperand(0);
  2251. SDValue Idx = ScalarSrc.getOperand(1);
  2252. EVT SrcVT = Src.getValueType();
  2253. ElementCount SrcEltCnt = SrcVT.getVectorElementCount();
  2254. if (SrcEltCnt.isScalable())
  2255. return false;
  2256. unsigned NumSrcElts = SrcEltCnt.getFixedValue();
  2257. if (isNullConstant(Idx)) {
  2258. APInt SrcDemandedElts = APInt::getOneBitSet(NumSrcElts, 0);
  2259. APInt SrcUndef = KnownUndef.zextOrTrunc(NumSrcElts);
  2260. APInt SrcZero = KnownZero.zextOrTrunc(NumSrcElts);
  2261. if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
  2262. TLO, Depth + 1))
  2263. return true;
  2264. }
  2265. }
  2266. KnownUndef.setHighBits(NumElts - 1);
  2267. break;
  2268. }
  2269. case ISD::BITCAST: {
  2270. SDValue Src = Op.getOperand(0);
  2271. EVT SrcVT = Src.getValueType();
  2272. // We only handle vectors here.
  2273. // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
  2274. if (!SrcVT.isVector())
  2275. break;
  2276. // Fast handling of 'identity' bitcasts.
  2277. unsigned NumSrcElts = SrcVT.getVectorNumElements();
  2278. if (NumSrcElts == NumElts)
  2279. return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
  2280. KnownZero, TLO, Depth + 1);
  2281. APInt SrcDemandedElts, SrcZero, SrcUndef;
  2282. // Bitcast from 'large element' src vector to 'small element' vector, we
  2283. // must demand a source element if any DemandedElt maps to it.
  2284. if ((NumElts % NumSrcElts) == 0) {
  2285. unsigned Scale = NumElts / NumSrcElts;
  2286. SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
  2287. if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
  2288. TLO, Depth + 1))
  2289. return true;
  2290. // Try calling SimplifyDemandedBits, converting demanded elts to the bits
  2291. // of the large element.
  2292. // TODO - bigendian once we have test coverage.
  2293. if (IsLE) {
  2294. unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
  2295. APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
  2296. for (unsigned i = 0; i != NumElts; ++i)
  2297. if (DemandedElts[i]) {
  2298. unsigned Ofs = (i % Scale) * EltSizeInBits;
  2299. SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
  2300. }
  2301. KnownBits Known;
  2302. if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
  2303. TLO, Depth + 1))
  2304. return true;
  2305. }
  2306. // If the src element is zero/undef then all the output elements will be -
  2307. // only demanded elements are guaranteed to be correct.
  2308. for (unsigned i = 0; i != NumSrcElts; ++i) {
  2309. if (SrcDemandedElts[i]) {
  2310. if (SrcZero[i])
  2311. KnownZero.setBits(i * Scale, (i + 1) * Scale);
  2312. if (SrcUndef[i])
  2313. KnownUndef.setBits(i * Scale, (i + 1) * Scale);
  2314. }
  2315. }
  2316. }
  2317. // Bitcast from 'small element' src vector to 'large element' vector, we
  2318. // demand all smaller source elements covered by the larger demanded element
  2319. // of this vector.
  2320. if ((NumSrcElts % NumElts) == 0) {
  2321. unsigned Scale = NumSrcElts / NumElts;
  2322. SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
  2323. if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
  2324. TLO, Depth + 1))
  2325. return true;
  2326. // If all the src elements covering an output element are zero/undef, then
  2327. // the output element will be as well, assuming it was demanded.
  2328. for (unsigned i = 0; i != NumElts; ++i) {
  2329. if (DemandedElts[i]) {
  2330. if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
  2331. KnownZero.setBit(i);
  2332. if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
  2333. KnownUndef.setBit(i);
  2334. }
  2335. }
  2336. }
  2337. break;
  2338. }
  2339. case ISD::BUILD_VECTOR: {
  2340. // Check all elements and simplify any unused elements with UNDEF.
  2341. if (!DemandedElts.isAllOnes()) {
  2342. // Don't simplify BROADCASTS.
  2343. if (llvm::any_of(Op->op_values(),
  2344. [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
  2345. SmallVector<SDValue, 32> Ops(Op->op_begin(), Op->op_end());
  2346. bool Updated = false;
  2347. for (unsigned i = 0; i != NumElts; ++i) {
  2348. if (!DemandedElts[i] && !Ops[i].isUndef()) {
  2349. Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
  2350. KnownUndef.setBit(i);
  2351. Updated = true;
  2352. }
  2353. }
  2354. if (Updated)
  2355. return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
  2356. }
  2357. }
  2358. for (unsigned i = 0; i != NumElts; ++i) {
  2359. SDValue SrcOp = Op.getOperand(i);
  2360. if (SrcOp.isUndef()) {
  2361. KnownUndef.setBit(i);
  2362. } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
  2363. (isNullConstant(SrcOp) || isNullFPConstant(SrcOp))) {
  2364. KnownZero.setBit(i);
  2365. }
  2366. }
  2367. break;
  2368. }
  2369. case ISD::CONCAT_VECTORS: {
  2370. EVT SubVT = Op.getOperand(0).getValueType();
  2371. unsigned NumSubVecs = Op.getNumOperands();
  2372. unsigned NumSubElts = SubVT.getVectorNumElements();
  2373. for (unsigned i = 0; i != NumSubVecs; ++i) {
  2374. SDValue SubOp = Op.getOperand(i);
  2375. APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
  2376. APInt SubUndef, SubZero;
  2377. if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
  2378. Depth + 1))
  2379. return true;
  2380. KnownUndef.insertBits(SubUndef, i * NumSubElts);
  2381. KnownZero.insertBits(SubZero, i * NumSubElts);
  2382. }
  2383. break;
  2384. }
  2385. case ISD::INSERT_SUBVECTOR: {
  2386. // Demand any elements from the subvector and the remainder from the src its
  2387. // inserted into.
  2388. SDValue Src = Op.getOperand(0);
  2389. SDValue Sub = Op.getOperand(1);
  2390. uint64_t Idx = Op.getConstantOperandVal(2);
  2391. unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
  2392. APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
  2393. APInt DemandedSrcElts = DemandedElts;
  2394. DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
  2395. APInt SubUndef, SubZero;
  2396. if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
  2397. Depth + 1))
  2398. return true;
  2399. // If none of the src operand elements are demanded, replace it with undef.
  2400. if (!DemandedSrcElts && !Src.isUndef())
  2401. return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
  2402. TLO.DAG.getUNDEF(VT), Sub,
  2403. Op.getOperand(2)));
  2404. if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
  2405. TLO, Depth + 1))
  2406. return true;
  2407. KnownUndef.insertBits(SubUndef, Idx);
  2408. KnownZero.insertBits(SubZero, Idx);
  2409. // Attempt to avoid multi-use ops if we don't need anything from them.
  2410. if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
  2411. SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
  2412. Src, DemandedSrcElts, TLO.DAG, Depth + 1);
  2413. SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
  2414. Sub, DemandedSubElts, TLO.DAG, Depth + 1);
  2415. if (NewSrc || NewSub) {
  2416. NewSrc = NewSrc ? NewSrc : Src;
  2417. NewSub = NewSub ? NewSub : Sub;
  2418. SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
  2419. NewSub, Op.getOperand(2));
  2420. return TLO.CombineTo(Op, NewOp);
  2421. }
  2422. }
  2423. break;
  2424. }
  2425. case ISD::EXTRACT_SUBVECTOR: {
  2426. // Offset the demanded elts by the subvector index.
  2427. SDValue Src = Op.getOperand(0);
  2428. if (Src.getValueType().isScalableVector())
  2429. break;
  2430. uint64_t Idx = Op.getConstantOperandVal(1);
  2431. unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
  2432. APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
  2433. APInt SrcUndef, SrcZero;
  2434. if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
  2435. Depth + 1))
  2436. return true;
  2437. KnownUndef = SrcUndef.extractBits(NumElts, Idx);
  2438. KnownZero = SrcZero.extractBits(NumElts, Idx);
  2439. // Attempt to avoid multi-use ops if we don't need anything from them.
  2440. if (!DemandedElts.isAllOnes()) {
  2441. SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
  2442. Src, DemandedSrcElts, TLO.DAG, Depth + 1);
  2443. if (NewSrc) {
  2444. SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
  2445. Op.getOperand(1));
  2446. return TLO.CombineTo(Op, NewOp);
  2447. }
  2448. }
  2449. break;
  2450. }
  2451. case ISD::INSERT_VECTOR_ELT: {
  2452. SDValue Vec = Op.getOperand(0);
  2453. SDValue Scl = Op.getOperand(1);
  2454. auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
  2455. // For a legal, constant insertion index, if we don't need this insertion
  2456. // then strip it, else remove it from the demanded elts.
  2457. if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
  2458. unsigned Idx = CIdx->getZExtValue();
  2459. if (!DemandedElts[Idx])
  2460. return TLO.CombineTo(Op, Vec);
  2461. APInt DemandedVecElts(DemandedElts);
  2462. DemandedVecElts.clearBit(Idx);
  2463. if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
  2464. KnownZero, TLO, Depth + 1))
  2465. return true;
  2466. KnownUndef.setBitVal(Idx, Scl.isUndef());
  2467. KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
  2468. break;
  2469. }
  2470. APInt VecUndef, VecZero;
  2471. if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
  2472. Depth + 1))
  2473. return true;
  2474. // Without knowing the insertion index we can't set KnownUndef/KnownZero.
  2475. break;
  2476. }
  2477. case ISD::VSELECT: {
  2478. // Try to transform the select condition based on the current demanded
  2479. // elements.
  2480. // TODO: If a condition element is undef, we can choose from one arm of the
  2481. // select (and if one arm is undef, then we can propagate that to the
  2482. // result).
  2483. // TODO - add support for constant vselect masks (see IR version of this).
  2484. APInt UnusedUndef, UnusedZero;
  2485. if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UnusedUndef,
  2486. UnusedZero, TLO, Depth + 1))
  2487. return true;
  2488. // See if we can simplify either vselect operand.
  2489. APInt DemandedLHS(DemandedElts);
  2490. APInt DemandedRHS(DemandedElts);
  2491. APInt UndefLHS, ZeroLHS;
  2492. APInt UndefRHS, ZeroRHS;
  2493. if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedLHS, UndefLHS,
  2494. ZeroLHS, TLO, Depth + 1))
  2495. return true;
  2496. if (SimplifyDemandedVectorElts(Op.getOperand(2), DemandedRHS, UndefRHS,
  2497. ZeroRHS, TLO, Depth + 1))
  2498. return true;
  2499. KnownUndef = UndefLHS & UndefRHS;
  2500. KnownZero = ZeroLHS & ZeroRHS;
  2501. break;
  2502. }
  2503. case ISD::VECTOR_SHUFFLE: {
  2504. ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
  2505. // Collect demanded elements from shuffle operands..
  2506. APInt DemandedLHS(NumElts, 0);
  2507. APInt DemandedRHS(NumElts, 0);
  2508. for (unsigned i = 0; i != NumElts; ++i) {
  2509. int M = ShuffleMask[i];
  2510. if (M < 0 || !DemandedElts[i])
  2511. continue;
  2512. assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
  2513. if (M < (int)NumElts)
  2514. DemandedLHS.setBit(M);
  2515. else
  2516. DemandedRHS.setBit(M - NumElts);
  2517. }
  2518. // See if we can simplify either shuffle operand.
  2519. APInt UndefLHS, ZeroLHS;
  2520. APInt UndefRHS, ZeroRHS;
  2521. if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedLHS, UndefLHS,
  2522. ZeroLHS, TLO, Depth + 1))
  2523. return true;
  2524. if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedRHS, UndefRHS,
  2525. ZeroRHS, TLO, Depth + 1))
  2526. return true;
  2527. // Simplify mask using undef elements from LHS/RHS.
  2528. bool Updated = false;
  2529. bool IdentityLHS = true, IdentityRHS = true;
  2530. SmallVector<int, 32> NewMask(ShuffleMask.begin(), ShuffleMask.end());
  2531. for (unsigned i = 0; i != NumElts; ++i) {
  2532. int &M = NewMask[i];
  2533. if (M < 0)
  2534. continue;
  2535. if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
  2536. (M >= (int)NumElts && UndefRHS[M - NumElts])) {
  2537. Updated = true;
  2538. M = -1;
  2539. }
  2540. IdentityLHS &= (M < 0) || (M == (int)i);
  2541. IdentityRHS &= (M < 0) || ((M - NumElts) == i);
  2542. }
  2543. // Update legal shuffle masks based on demanded elements if it won't reduce
  2544. // to Identity which can cause premature removal of the shuffle mask.
  2545. if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
  2546. SDValue LegalShuffle =
  2547. buildLegalVectorShuffle(VT, DL, Op.getOperand(0), Op.getOperand(1),
  2548. NewMask, TLO.DAG);
  2549. if (LegalShuffle)
  2550. return TLO.CombineTo(Op, LegalShuffle);
  2551. }
  2552. // Propagate undef/zero elements from LHS/RHS.
  2553. for (unsigned i = 0; i != NumElts; ++i) {
  2554. int M = ShuffleMask[i];
  2555. if (M < 0) {
  2556. KnownUndef.setBit(i);
  2557. } else if (M < (int)NumElts) {
  2558. if (UndefLHS[M])
  2559. KnownUndef.setBit(i);
  2560. if (ZeroLHS[M])
  2561. KnownZero.setBit(i);
  2562. } else {
  2563. if (UndefRHS[M - NumElts])
  2564. KnownUndef.setBit(i);
  2565. if (ZeroRHS[M - NumElts])
  2566. KnownZero.setBit(i);
  2567. }
  2568. }
  2569. break;
  2570. }
  2571. case ISD::ANY_EXTEND_VECTOR_INREG:
  2572. case ISD::SIGN_EXTEND_VECTOR_INREG:
  2573. case ISD::ZERO_EXTEND_VECTOR_INREG: {
  2574. APInt SrcUndef, SrcZero;
  2575. SDValue Src = Op.getOperand(0);
  2576. unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
  2577. APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts);
  2578. if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
  2579. Depth + 1))
  2580. return true;
  2581. KnownZero = SrcZero.zextOrTrunc(NumElts);
  2582. KnownUndef = SrcUndef.zextOrTrunc(NumElts);
  2583. if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
  2584. Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
  2585. DemandedSrcElts == 1) {
  2586. // aext - if we just need the bottom element then we can bitcast.
  2587. return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
  2588. }
  2589. if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
  2590. // zext(undef) upper bits are guaranteed to be zero.
  2591. if (DemandedElts.isSubsetOf(KnownUndef))
  2592. return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
  2593. KnownUndef.clearAllBits();
  2594. // zext - if we just need the bottom element then we can mask:
  2595. // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
  2596. if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
  2597. Op->isOnlyUserOf(Src.getNode()) &&
  2598. Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
  2599. SDLoc DL(Op);
  2600. EVT SrcVT = Src.getValueType();
  2601. EVT SrcSVT = SrcVT.getScalarType();
  2602. SmallVector<SDValue> MaskElts;
  2603. MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
  2604. MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
  2605. SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
  2606. if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
  2607. ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
  2608. Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
  2609. return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
  2610. }
  2611. }
  2612. }
  2613. break;
  2614. }
  2615. // TODO: There are more binop opcodes that could be handled here - MIN,
  2616. // MAX, saturated math, etc.
  2617. case ISD::ADD: {
  2618. SDValue Op0 = Op.getOperand(0);
  2619. SDValue Op1 = Op.getOperand(1);
  2620. if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) {
  2621. APInt UndefLHS, ZeroLHS;
  2622. if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
  2623. Depth + 1, /*AssumeSingleUse*/ true))
  2624. return true;
  2625. }
  2626. LLVM_FALLTHROUGH;
  2627. }
  2628. case ISD::OR:
  2629. case ISD::XOR:
  2630. case ISD::SUB:
  2631. case ISD::FADD:
  2632. case ISD::FSUB:
  2633. case ISD::FMUL:
  2634. case ISD::FDIV:
  2635. case ISD::FREM: {
  2636. SDValue Op0 = Op.getOperand(0);
  2637. SDValue Op1 = Op.getOperand(1);
  2638. APInt UndefRHS, ZeroRHS;
  2639. if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
  2640. Depth + 1))
  2641. return true;
  2642. APInt UndefLHS, ZeroLHS;
  2643. if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
  2644. Depth + 1))
  2645. return true;
  2646. KnownZero = ZeroLHS & ZeroRHS;
  2647. KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
  2648. // Attempt to avoid multi-use ops if we don't need anything from them.
  2649. // TODO - use KnownUndef to relax the demandedelts?
  2650. if (!DemandedElts.isAllOnes())
  2651. if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
  2652. return true;
  2653. break;
  2654. }
  2655. case ISD::SHL:
  2656. case ISD::SRL:
  2657. case ISD::SRA:
  2658. case ISD::ROTL:
  2659. case ISD::ROTR: {
  2660. SDValue Op0 = Op.getOperand(0);
  2661. SDValue Op1 = Op.getOperand(1);
  2662. APInt UndefRHS, ZeroRHS;
  2663. if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
  2664. Depth + 1))
  2665. return true;
  2666. APInt UndefLHS, ZeroLHS;
  2667. if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
  2668. Depth + 1))
  2669. return true;
  2670. KnownZero = ZeroLHS;
  2671. KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
  2672. // Attempt to avoid multi-use ops if we don't need anything from them.
  2673. // TODO - use KnownUndef to relax the demandedelts?
  2674. if (!DemandedElts.isAllOnes())
  2675. if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
  2676. return true;
  2677. break;
  2678. }
  2679. case ISD::MUL:
  2680. case ISD::AND: {
  2681. SDValue Op0 = Op.getOperand(0);
  2682. SDValue Op1 = Op.getOperand(1);
  2683. APInt SrcUndef, SrcZero;
  2684. if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
  2685. Depth + 1))
  2686. return true;
  2687. if (SimplifyDemandedVectorElts(Op0, DemandedElts, KnownUndef, KnownZero,
  2688. TLO, Depth + 1))
  2689. return true;
  2690. // If either side has a zero element, then the result element is zero, even
  2691. // if the other is an UNDEF.
  2692. // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
  2693. // and then handle 'and' nodes with the rest of the binop opcodes.
  2694. KnownZero |= SrcZero;
  2695. KnownUndef &= SrcUndef;
  2696. KnownUndef &= ~KnownZero;
  2697. // Attempt to avoid multi-use ops if we don't need anything from them.
  2698. // TODO - use KnownUndef to relax the demandedelts?
  2699. if (!DemandedElts.isAllOnes())
  2700. if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
  2701. return true;
  2702. break;
  2703. }
  2704. case ISD::TRUNCATE:
  2705. case ISD::SIGN_EXTEND:
  2706. case ISD::ZERO_EXTEND:
  2707. if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
  2708. KnownZero, TLO, Depth + 1))
  2709. return true;
  2710. if (Op.getOpcode() == ISD::ZERO_EXTEND) {
  2711. // zext(undef) upper bits are guaranteed to be zero.
  2712. if (DemandedElts.isSubsetOf(KnownUndef))
  2713. return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
  2714. KnownUndef.clearAllBits();
  2715. }
  2716. break;
  2717. default: {
  2718. if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
  2719. if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
  2720. KnownZero, TLO, Depth))
  2721. return true;
  2722. } else {
  2723. KnownBits Known;
  2724. APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
  2725. if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
  2726. TLO, Depth, AssumeSingleUse))
  2727. return true;
  2728. }
  2729. break;
  2730. }
  2731. }
  2732. assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
  2733. // Constant fold all undef cases.
  2734. // TODO: Handle zero cases as well.
  2735. if (DemandedElts.isSubsetOf(KnownUndef))
  2736. return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
  2737. return false;
  2738. }
  2739. /// Determine which of the bits specified in Mask are known to be either zero or
  2740. /// one and return them in the Known.
  2741. void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
  2742. KnownBits &Known,
  2743. const APInt &DemandedElts,
  2744. const SelectionDAG &DAG,
  2745. unsigned Depth) const {
  2746. assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
  2747. Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
  2748. Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
  2749. Op.getOpcode() == ISD::INTRINSIC_VOID) &&
  2750. "Should use MaskedValueIsZero if you don't know whether Op"
  2751. " is a target node!");
  2752. Known.resetAll();
  2753. }
  2754. void TargetLowering::computeKnownBitsForTargetInstr(
  2755. GISelKnownBits &Analysis, Register R, KnownBits &Known,
  2756. const APInt &DemandedElts, const MachineRegisterInfo &MRI,
  2757. unsigned Depth) const {
  2758. Known.resetAll();
  2759. }
  2760. void TargetLowering::computeKnownBitsForFrameIndex(
  2761. const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
  2762. // The low bits are known zero if the pointer is aligned.
  2763. Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
  2764. }
  2765. Align TargetLowering::computeKnownAlignForTargetInstr(
  2766. GISelKnownBits &Analysis, Register R, const MachineRegisterInfo &MRI,
  2767. unsigned Depth) const {
  2768. return Align(1);
  2769. }
  2770. /// This method can be implemented by targets that want to expose additional
  2771. /// information about sign bits to the DAG Combiner.
  2772. unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
  2773. const APInt &,
  2774. const SelectionDAG &,
  2775. unsigned Depth) const {
  2776. assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
  2777. Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
  2778. Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
  2779. Op.getOpcode() == ISD::INTRINSIC_VOID) &&
  2780. "Should use ComputeNumSignBits if you don't know whether Op"
  2781. " is a target node!");
  2782. return 1;
  2783. }
  2784. unsigned TargetLowering::computeNumSignBitsForTargetInstr(
  2785. GISelKnownBits &Analysis, Register R, const APInt &DemandedElts,
  2786. const MachineRegisterInfo &MRI, unsigned Depth) const {
  2787. return 1;
  2788. }
  2789. bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
  2790. SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
  2791. TargetLoweringOpt &TLO, unsigned Depth) const {
  2792. assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
  2793. Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
  2794. Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
  2795. Op.getOpcode() == ISD::INTRINSIC_VOID) &&
  2796. "Should use SimplifyDemandedVectorElts if you don't know whether Op"
  2797. " is a target node!");
  2798. return false;
  2799. }
  2800. bool TargetLowering::SimplifyDemandedBitsForTargetNode(
  2801. SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
  2802. KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
  2803. assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
  2804. Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
  2805. Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
  2806. Op.getOpcode() == ISD::INTRINSIC_VOID) &&
  2807. "Should use SimplifyDemandedBits if you don't know whether Op"
  2808. " is a target node!");
  2809. computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
  2810. return false;
  2811. }
  2812. SDValue TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
  2813. SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
  2814. SelectionDAG &DAG, unsigned Depth) const {
  2815. assert(
  2816. (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
  2817. Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
  2818. Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
  2819. Op.getOpcode() == ISD::INTRINSIC_VOID) &&
  2820. "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
  2821. " is a target node!");
  2822. return SDValue();
  2823. }
  2824. SDValue
  2825. TargetLowering::buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0,
  2826. SDValue N1, MutableArrayRef<int> Mask,
  2827. SelectionDAG &DAG) const {
  2828. bool LegalMask = isShuffleMaskLegal(Mask, VT);
  2829. if (!LegalMask) {
  2830. std::swap(N0, N1);
  2831. ShuffleVectorSDNode::commuteMask(Mask);
  2832. LegalMask = isShuffleMaskLegal(Mask, VT);
  2833. }
  2834. if (!LegalMask)
  2835. return SDValue();
  2836. return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
  2837. }
  2838. const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const {
  2839. return nullptr;
  2840. }
  2841. bool TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
  2842. SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
  2843. bool PoisonOnly, unsigned Depth) const {
  2844. assert(
  2845. (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
  2846. Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
  2847. Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
  2848. Op.getOpcode() == ISD::INTRINSIC_VOID) &&
  2849. "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
  2850. " is a target node!");
  2851. return false;
  2852. }
  2853. bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
  2854. const SelectionDAG &DAG,
  2855. bool SNaN,
  2856. unsigned Depth) const {
  2857. assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
  2858. Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
  2859. Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
  2860. Op.getOpcode() == ISD::INTRINSIC_VOID) &&
  2861. "Should use isKnownNeverNaN if you don't know whether Op"
  2862. " is a target node!");
  2863. return false;
  2864. }
  2865. bool TargetLowering::isSplatValueForTargetNode(SDValue Op,
  2866. const APInt &DemandedElts,
  2867. APInt &UndefElts,
  2868. unsigned Depth) const {
  2869. assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
  2870. Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
  2871. Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
  2872. Op.getOpcode() == ISD::INTRINSIC_VOID) &&
  2873. "Should use isSplatValue if you don't know whether Op"
  2874. " is a target node!");
  2875. return false;
  2876. }
  2877. // FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
  2878. // work with truncating build vectors and vectors with elements of less than
  2879. // 8 bits.
  2880. bool TargetLowering::isConstTrueVal(SDValue N) const {
  2881. if (!N)
  2882. return false;
  2883. unsigned EltWidth;
  2884. APInt CVal;
  2885. if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
  2886. /*AllowTruncation=*/true)) {
  2887. CVal = CN->getAPIntValue();
  2888. EltWidth = N.getValueType().getScalarSizeInBits();
  2889. } else
  2890. return false;
  2891. // If this is a truncating splat, truncate the splat value.
  2892. // Otherwise, we may fail to match the expected values below.
  2893. if (EltWidth < CVal.getBitWidth())
  2894. CVal = CVal.trunc(EltWidth);
  2895. switch (getBooleanContents(N.getValueType())) {
  2896. case UndefinedBooleanContent:
  2897. return CVal[0];
  2898. case ZeroOrOneBooleanContent:
  2899. return CVal.isOne();
  2900. case ZeroOrNegativeOneBooleanContent:
  2901. return CVal.isAllOnes();
  2902. }
  2903. llvm_unreachable("Invalid boolean contents");
  2904. }
  2905. bool TargetLowering::isConstFalseVal(SDValue N) const {
  2906. if (!N)
  2907. return false;
  2908. const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
  2909. if (!CN) {
  2910. const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
  2911. if (!BV)
  2912. return false;
  2913. // Only interested in constant splats, we don't care about undef
  2914. // elements in identifying boolean constants and getConstantSplatNode
  2915. // returns NULL if all ops are undef;
  2916. CN = BV->getConstantSplatNode();
  2917. if (!CN)
  2918. return false;
  2919. }
  2920. if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
  2921. return !CN->getAPIntValue()[0];
  2922. return CN->isZero();
  2923. }
  2924. bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
  2925. bool SExt) const {
  2926. if (VT == MVT::i1)
  2927. return N->isOne();
  2928. TargetLowering::BooleanContent Cnt = getBooleanContents(VT);
  2929. switch (Cnt) {
  2930. case TargetLowering::ZeroOrOneBooleanContent:
  2931. // An extended value of 1 is always true, unless its original type is i1,
  2932. // in which case it will be sign extended to -1.
  2933. return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
  2934. case TargetLowering::UndefinedBooleanContent:
  2935. case TargetLowering::ZeroOrNegativeOneBooleanContent:
  2936. return N->isAllOnes() && SExt;
  2937. }
  2938. llvm_unreachable("Unexpected enumeration.");
  2939. }
  2940. /// This helper function of SimplifySetCC tries to optimize the comparison when
  2941. /// either operand of the SetCC node is a bitwise-and instruction.
  2942. SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
  2943. ISD::CondCode Cond, const SDLoc &DL,
  2944. DAGCombinerInfo &DCI) const {
  2945. if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
  2946. std::swap(N0, N1);
  2947. SelectionDAG &DAG = DCI.DAG;
  2948. EVT OpVT = N0.getValueType();
  2949. if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
  2950. (Cond != ISD::SETEQ && Cond != ISD::SETNE))
  2951. return SDValue();
  2952. // (X & Y) != 0 --> zextOrTrunc(X & Y)
  2953. // iff everything but LSB is known zero:
  2954. if (Cond == ISD::SETNE && isNullConstant(N1) &&
  2955. (getBooleanContents(OpVT) == TargetLowering::UndefinedBooleanContent ||
  2956. getBooleanContents(OpVT) == TargetLowering::ZeroOrOneBooleanContent)) {
  2957. unsigned NumEltBits = OpVT.getScalarSizeInBits();
  2958. APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);
  2959. if (DAG.MaskedValueIsZero(N0, UpperBits))
  2960. return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
  2961. }
  2962. // Match these patterns in any of their permutations:
  2963. // (X & Y) == Y
  2964. // (X & Y) != Y
  2965. SDValue X, Y;
  2966. if (N0.getOperand(0) == N1) {
  2967. X = N0.getOperand(1);
  2968. Y = N0.getOperand(0);
  2969. } else if (N0.getOperand(1) == N1) {
  2970. X = N0.getOperand(0);
  2971. Y = N0.getOperand(1);
  2972. } else {
  2973. return SDValue();
  2974. }
  2975. SDValue Zero = DAG.getConstant(0, DL, OpVT);
  2976. if (DAG.isKnownToBeAPowerOfTwo(Y)) {
  2977. // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
  2978. // Note that where Y is variable and is known to have at most one bit set
  2979. // (for example, if it is Z & 1) we cannot do this; the expressions are not
  2980. // equivalent when Y == 0.
  2981. assert(OpVT.isInteger());
  2982. Cond = ISD::getSetCCInverse(Cond, OpVT);
  2983. if (DCI.isBeforeLegalizeOps() ||
  2984. isCondCodeLegal(Cond, N0.getSimpleValueType()))
  2985. return DAG.getSetCC(DL, VT, N0, Zero, Cond);
  2986. } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
  2987. // If the target supports an 'and-not' or 'and-complement' logic operation,
  2988. // try to use that to make a comparison operation more efficient.
  2989. // But don't do this transform if the mask is a single bit because there are
  2990. // more efficient ways to deal with that case (for example, 'bt' on x86 or
  2991. // 'rlwinm' on PPC).
  2992. // Bail out if the compare operand that we want to turn into a zero is
  2993. // already a zero (otherwise, infinite loop).
  2994. auto *YConst = dyn_cast<ConstantSDNode>(Y);
  2995. if (YConst && YConst->isZero())
  2996. return SDValue();
  2997. // Transform this into: ~X & Y == 0.
  2998. SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
  2999. SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
  3000. return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
  3001. }
  3002. return SDValue();
  3003. }
  3004. /// There are multiple IR patterns that could be checking whether certain
  3005. /// truncation of a signed number would be lossy or not. The pattern which is
  3006. /// best at IR level, may not lower optimally. Thus, we want to unfold it.
  3007. /// We are looking for the following pattern: (KeptBits is a constant)
  3008. /// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
  3009. /// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
  3010. /// KeptBits also can't be 1, that would have been folded to %x dstcond 0
  3011. /// We will unfold it into the natural trunc+sext pattern:
  3012. /// ((%x << C) a>> C) dstcond %x
  3013. /// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
  3014. SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
  3015. EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
  3016. const SDLoc &DL) const {
  3017. // We must be comparing with a constant.
  3018. ConstantSDNode *C1;
  3019. if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
  3020. return SDValue();
  3021. // N0 should be: add %x, (1 << (KeptBits-1))
  3022. if (N0->getOpcode() != ISD::ADD)
  3023. return SDValue();
  3024. // And we must be 'add'ing a constant.
  3025. ConstantSDNode *C01;
  3026. if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
  3027. return SDValue();
  3028. SDValue X = N0->getOperand(0);
  3029. EVT XVT = X.getValueType();
  3030. // Validate constants ...
  3031. APInt I1 = C1->getAPIntValue();
  3032. ISD::CondCode NewCond;
  3033. if (Cond == ISD::CondCode::SETULT) {
  3034. NewCond = ISD::CondCode::SETEQ;
  3035. } else if (Cond == ISD::CondCode::SETULE) {
  3036. NewCond = ISD::CondCode::SETEQ;
  3037. // But need to 'canonicalize' the constant.
  3038. I1 += 1;
  3039. } else if (Cond == ISD::CondCode::SETUGT) {
  3040. NewCond = ISD::CondCode::SETNE;
  3041. // But need to 'canonicalize' the constant.
  3042. I1 += 1;
  3043. } else if (Cond == ISD::CondCode::SETUGE) {
  3044. NewCond = ISD::CondCode::SETNE;
  3045. } else
  3046. return SDValue();
  3047. APInt I01 = C01->getAPIntValue();
  3048. auto checkConstants = [&I1, &I01]() -> bool {
  3049. // Both of them must be power-of-two, and the constant from setcc is bigger.
  3050. return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
  3051. };
  3052. if (checkConstants()) {
  3053. // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
  3054. } else {
  3055. // What if we invert constants? (and the target predicate)
  3056. I1.negate();
  3057. I01.negate();
  3058. assert(XVT.isInteger());
  3059. NewCond = getSetCCInverse(NewCond, XVT);
  3060. if (!checkConstants())
  3061. return SDValue();
  3062. // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
  3063. }
  3064. // They are power-of-two, so which bit is set?
  3065. const unsigned KeptBits = I1.logBase2();
  3066. const unsigned KeptBitsMinusOne = I01.logBase2();
  3067. // Magic!
  3068. if (KeptBits != (KeptBitsMinusOne + 1))
  3069. return SDValue();
  3070. assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
  3071. // We don't want to do this in every single case.
  3072. SelectionDAG &DAG = DCI.DAG;
  3073. if (!DAG.getTargetLoweringInfo().shouldTransformSignedTruncationCheck(
  3074. XVT, KeptBits))
  3075. return SDValue();
  3076. const unsigned MaskedBits = XVT.getSizeInBits() - KeptBits;
  3077. assert(MaskedBits > 0 && MaskedBits < XVT.getSizeInBits() && "unreachable");
  3078. // Unfold into: ((%x << C) a>> C) cond %x
  3079. // Where 'cond' will be either 'eq' or 'ne'.
  3080. SDValue ShiftAmt = DAG.getConstant(MaskedBits, DL, XVT);
  3081. SDValue T0 = DAG.getNode(ISD::SHL, DL, XVT, X, ShiftAmt);
  3082. SDValue T1 = DAG.getNode(ISD::SRA, DL, XVT, T0, ShiftAmt);
  3083. SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, X, NewCond);
  3084. return T2;
  3085. }
  3086. // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
  3087. SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
  3088. EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
  3089. DAGCombinerInfo &DCI, const SDLoc &DL) const {
  3090. assert(isConstOrConstSplat(N1C) &&
  3091. isConstOrConstSplat(N1C)->getAPIntValue().isZero() &&
  3092. "Should be a comparison with 0.");
  3093. assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
  3094. "Valid only for [in]equality comparisons.");
  3095. unsigned NewShiftOpcode;
  3096. SDValue X, C, Y;
  3097. SelectionDAG &DAG = DCI.DAG;
  3098. const TargetLowering &TLI = DAG.getTargetLoweringInfo();
  3099. // Look for '(C l>>/<< Y)'.
  3100. auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI, &DAG](SDValue V) {
  3101. // The shift should be one-use.
  3102. if (!V.hasOneUse())
  3103. return false;
  3104. unsigned OldShiftOpcode = V.getOpcode();
  3105. switch (OldShiftOpcode) {
  3106. case ISD::SHL:
  3107. NewShiftOpcode = ISD::SRL;
  3108. break;
  3109. case ISD::SRL:
  3110. NewShiftOpcode = ISD::SHL;
  3111. break;
  3112. default:
  3113. return false; // must be a logical shift.
  3114. }
  3115. // We should be shifting a constant.
  3116. // FIXME: best to use isConstantOrConstantVector().
  3117. C = V.getOperand(0);
  3118. ConstantSDNode *CC =
  3119. isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
  3120. if (!CC)
  3121. return false;
  3122. Y = V.getOperand(1);
  3123. ConstantSDNode *XC =
  3124. isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
  3125. return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
  3126. X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
  3127. };
  3128. // LHS of comparison should be an one-use 'and'.
  3129. if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
  3130. return SDValue();
  3131. X = N0.getOperand(0);
  3132. SDValue Mask = N0.getOperand(1);
  3133. // 'and' is commutative!
  3134. if (!Match(Mask)) {
  3135. std::swap(X, Mask);
  3136. if (!Match(Mask))
  3137. return SDValue();
  3138. }
  3139. EVT VT = X.getValueType();
  3140. // Produce:
  3141. // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
  3142. SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
  3143. SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
  3144. SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
  3145. return T2;
  3146. }
  3147. /// Try to fold an equality comparison with a {add/sub/xor} binary operation as
  3148. /// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
  3149. /// handle the commuted versions of these patterns.
  3150. SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
  3151. ISD::CondCode Cond, const SDLoc &DL,
  3152. DAGCombinerInfo &DCI) const {
  3153. unsigned BOpcode = N0.getOpcode();
  3154. assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
  3155. "Unexpected binop");
  3156. assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
  3157. // (X + Y) == X --> Y == 0
  3158. // (X - Y) == X --> Y == 0
  3159. // (X ^ Y) == X --> Y == 0
  3160. SelectionDAG &DAG = DCI.DAG;
  3161. EVT OpVT = N0.getValueType();
  3162. SDValue X = N0.getOperand(0);
  3163. SDValue Y = N0.getOperand(1);
  3164. if (X == N1)
  3165. return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
  3166. if (Y != N1)
  3167. return SDValue();
  3168. // (X + Y) == Y --> X == 0
  3169. // (X ^ Y) == Y --> X == 0
  3170. if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
  3171. return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
  3172. // The shift would not be valid if the operands are boolean (i1).
  3173. if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
  3174. return SDValue();
  3175. // (X - Y) == Y --> X == Y << 1
  3176. EVT ShiftVT = getShiftAmountTy(OpVT, DAG.getDataLayout(),
  3177. !DCI.isBeforeLegalize());
  3178. SDValue One = DAG.getConstant(1, DL, ShiftVT);
  3179. SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
  3180. if (!DCI.isCalledByLegalizer())
  3181. DCI.AddToWorklist(YShl1.getNode());
  3182. return DAG.getSetCC(DL, VT, X, YShl1, Cond);
  3183. }
  3184. static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT,
  3185. SDValue N0, const APInt &C1,
  3186. ISD::CondCode Cond, const SDLoc &dl,
  3187. SelectionDAG &DAG) {
  3188. // Look through truncs that don't change the value of a ctpop.
  3189. // FIXME: Add vector support? Need to be careful with setcc result type below.
  3190. SDValue CTPOP = N0;
  3191. if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
  3192. N0.getScalarValueSizeInBits() > Log2_32(N0.getOperand(0).getScalarValueSizeInBits()))
  3193. CTPOP = N0.getOperand(0);
  3194. if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
  3195. return SDValue();
  3196. EVT CTVT = CTPOP.getValueType();
  3197. SDValue CTOp = CTPOP.getOperand(0);
  3198. // If this is a vector CTPOP, keep the CTPOP if it is legal.
  3199. // TODO: Should we check if CTPOP is legal(or custom) for scalars?
  3200. if (VT.isVector() && TLI.isOperationLegal(ISD::CTPOP, CTVT))
  3201. return SDValue();
  3202. // (ctpop x) u< 2 -> (x & x-1) == 0
  3203. // (ctpop x) u> 1 -> (x & x-1) != 0
  3204. if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
  3205. unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
  3206. if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
  3207. return SDValue();
  3208. if (C1 == 0 && (Cond == ISD::SETULT))
  3209. return SDValue(); // This is handled elsewhere.
  3210. unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
  3211. SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
  3212. SDValue Result = CTOp;
  3213. for (unsigned i = 0; i < Passes; i++) {
  3214. SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
  3215. Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
  3216. }
  3217. ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
  3218. return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
  3219. }
  3220. // If ctpop is not supported, expand a power-of-2 comparison based on it.
  3221. if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
  3222. // For scalars, keep CTPOP if it is legal or custom.
  3223. if (!VT.isVector() && TLI.isOperationLegalOrCustom(ISD::CTPOP, CTVT))
  3224. return SDValue();
  3225. // This is based on X86's custom lowering for CTPOP which produces more
  3226. // instructions than the expansion here.
  3227. // (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0)
  3228. // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0)
  3229. SDValue Zero = DAG.getConstant(0, dl, CTVT);
  3230. SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
  3231. assert(CTVT.isInteger());
  3232. ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, CTVT);
  3233. SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
  3234. SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
  3235. SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond);
  3236. SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
  3237. unsigned LogicOpcode = Cond == ISD::SETEQ ? ISD::AND : ISD::OR;
  3238. return DAG.getNode(LogicOpcode, dl, VT, LHS, RHS);
  3239. }
  3240. return SDValue();
  3241. }
  3242. /// Try to simplify a setcc built with the specified operands and cc. If it is
  3243. /// unable to simplify it, return a null SDValue.
  3244. SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
  3245. ISD::CondCode Cond, bool foldBooleans,
  3246. DAGCombinerInfo &DCI,
  3247. const SDLoc &dl) const {
  3248. SelectionDAG &DAG = DCI.DAG;
  3249. const DataLayout &Layout = DAG.getDataLayout();
  3250. EVT OpVT = N0.getValueType();
  3251. // Constant fold or commute setcc.
  3252. if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
  3253. return Fold;
  3254. // Ensure that the constant occurs on the RHS and fold constant comparisons.
  3255. // TODO: Handle non-splat vector constants. All undef causes trouble.
  3256. // FIXME: We can't yet fold constant scalable vector splats, so avoid an
  3257. // infinite loop here when we encounter one.
  3258. ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond);
  3259. if (isConstOrConstSplat(N0) &&
  3260. (!OpVT.isScalableVector() || !isConstOrConstSplat(N1)) &&
  3261. (DCI.isBeforeLegalizeOps() ||
  3262. isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
  3263. return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
  3264. // If we have a subtract with the same 2 non-constant operands as this setcc
  3265. // -- but in reverse order -- then try to commute the operands of this setcc
  3266. // to match. A matching pair of setcc (cmp) and sub may be combined into 1
  3267. // instruction on some targets.
  3268. if (!isConstOrConstSplat(N0) && !isConstOrConstSplat(N1) &&
  3269. (DCI.isBeforeLegalizeOps() ||
  3270. isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
  3271. DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
  3272. !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
  3273. return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
  3274. if (auto *N1C = isConstOrConstSplat(N1)) {
  3275. const APInt &C1 = N1C->getAPIntValue();
  3276. // Optimize some CTPOP cases.
  3277. if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
  3278. return V;
  3279. // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
  3280. // equality comparison, then we're just comparing whether X itself is
  3281. // zero.
  3282. if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
  3283. N0.getOperand(0).getOpcode() == ISD::CTLZ &&
  3284. isPowerOf2_32(N0.getScalarValueSizeInBits())) {
  3285. if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
  3286. if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
  3287. ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
  3288. if ((C1 == 0) == (Cond == ISD::SETEQ)) {
  3289. // (srl (ctlz x), 5) == 0 -> X != 0
  3290. // (srl (ctlz x), 5) != 1 -> X != 0
  3291. Cond = ISD::SETNE;
  3292. } else {
  3293. // (srl (ctlz x), 5) != 0 -> X == 0
  3294. // (srl (ctlz x), 5) == 1 -> X == 0
  3295. Cond = ISD::SETEQ;
  3296. }
  3297. SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
  3298. return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
  3299. Cond);
  3300. }
  3301. }
  3302. }
  3303. }
  3304. // FIXME: Support vectors.
  3305. if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
  3306. const APInt &C1 = N1C->getAPIntValue();
  3307. // (zext x) == C --> x == (trunc C)
  3308. // (sext x) == C --> x == (trunc C)
  3309. if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
  3310. DCI.isBeforeLegalize() && N0->hasOneUse()) {
  3311. unsigned MinBits = N0.getValueSizeInBits();
  3312. SDValue PreExt;
  3313. bool Signed = false;
  3314. if (N0->getOpcode() == ISD::ZERO_EXTEND) {
  3315. // ZExt
  3316. MinBits = N0->getOperand(0).getValueSizeInBits();
  3317. PreExt = N0->getOperand(0);
  3318. } else if (N0->getOpcode() == ISD::AND) {
  3319. // DAGCombine turns costly ZExts into ANDs
  3320. if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
  3321. if ((C->getAPIntValue()+1).isPowerOf2()) {
  3322. MinBits = C->getAPIntValue().countTrailingOnes();
  3323. PreExt = N0->getOperand(0);
  3324. }
  3325. } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
  3326. // SExt
  3327. MinBits = N0->getOperand(0).getValueSizeInBits();
  3328. PreExt = N0->getOperand(0);
  3329. Signed = true;
  3330. } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
  3331. // ZEXTLOAD / SEXTLOAD
  3332. if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
  3333. MinBits = LN0->getMemoryVT().getSizeInBits();
  3334. PreExt = N0;
  3335. } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
  3336. Signed = true;
  3337. MinBits = LN0->getMemoryVT().getSizeInBits();
  3338. PreExt = N0;
  3339. }
  3340. }
  3341. // Figure out how many bits we need to preserve this constant.
  3342. unsigned ReqdBits = Signed ? C1.getMinSignedBits() : C1.getActiveBits();
  3343. // Make sure we're not losing bits from the constant.
  3344. if (MinBits > 0 &&
  3345. MinBits < C1.getBitWidth() &&
  3346. MinBits >= ReqdBits) {
  3347. EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
  3348. if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
  3349. // Will get folded away.
  3350. SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
  3351. if (MinBits == 1 && C1 == 1)
  3352. // Invert the condition.
  3353. return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
  3354. Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
  3355. SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
  3356. return DAG.getSetCC(dl, VT, Trunc, C, Cond);
  3357. }
  3358. // If truncating the setcc operands is not desirable, we can still
  3359. // simplify the expression in some cases:
  3360. // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
  3361. // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
  3362. // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
  3363. // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
  3364. // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
  3365. // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
  3366. SDValue TopSetCC = N0->getOperand(0);
  3367. unsigned N0Opc = N0->getOpcode();
  3368. bool SExt = (N0Opc == ISD::SIGN_EXTEND);
  3369. if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
  3370. TopSetCC.getOpcode() == ISD::SETCC &&
  3371. (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
  3372. (isConstFalseVal(N1) ||
  3373. isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
  3374. bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
  3375. (!N1C->isZero() && Cond == ISD::SETNE);
  3376. if (!Inverse)
  3377. return TopSetCC;
  3378. ISD::CondCode InvCond = ISD::getSetCCInverse(
  3379. cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
  3380. TopSetCC.getOperand(0).getValueType());
  3381. return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
  3382. TopSetCC.getOperand(1),
  3383. InvCond);
  3384. }
  3385. }
  3386. }
  3387. // If the LHS is '(and load, const)', the RHS is 0, the test is for
  3388. // equality or unsigned, and all 1 bits of the const are in the same
  3389. // partial word, see if we can shorten the load.
  3390. if (DCI.isBeforeLegalize() &&
  3391. !ISD::isSignedIntSetCC(Cond) &&
  3392. N0.getOpcode() == ISD::AND && C1 == 0 &&
  3393. N0.getNode()->hasOneUse() &&
  3394. isa<LoadSDNode>(N0.getOperand(0)) &&
  3395. N0.getOperand(0).getNode()->hasOneUse() &&
  3396. isa<ConstantSDNode>(N0.getOperand(1))) {
  3397. LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
  3398. APInt bestMask;
  3399. unsigned bestWidth = 0, bestOffset = 0;
  3400. if (Lod->isSimple() && Lod->isUnindexed()) {
  3401. unsigned origWidth = N0.getValueSizeInBits();
  3402. unsigned maskWidth = origWidth;
  3403. // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
  3404. // 8 bits, but have to be careful...
  3405. if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
  3406. origWidth = Lod->getMemoryVT().getSizeInBits();
  3407. const APInt &Mask = N0.getConstantOperandAPInt(1);
  3408. for (unsigned width = origWidth / 2; width>=8; width /= 2) {
  3409. APInt newMask = APInt::getLowBitsSet(maskWidth, width);
  3410. for (unsigned offset=0; offset<origWidth/width; offset++) {
  3411. if (Mask.isSubsetOf(newMask)) {
  3412. if (Layout.isLittleEndian())
  3413. bestOffset = (uint64_t)offset * (width/8);
  3414. else
  3415. bestOffset = (origWidth/width - offset - 1) * (width/8);
  3416. bestMask = Mask.lshr(offset * (width/8) * 8);
  3417. bestWidth = width;
  3418. break;
  3419. }
  3420. newMask <<= width;
  3421. }
  3422. }
  3423. }
  3424. if (bestWidth) {
  3425. EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
  3426. if (newVT.isRound() &&
  3427. shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT)) {
  3428. SDValue Ptr = Lod->getBasePtr();
  3429. if (bestOffset != 0)
  3430. Ptr =
  3431. DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(bestOffset), dl);
  3432. SDValue NewLoad =
  3433. DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
  3434. Lod->getPointerInfo().getWithOffset(bestOffset),
  3435. Lod->getOriginalAlign());
  3436. return DAG.getSetCC(dl, VT,
  3437. DAG.getNode(ISD::AND, dl, newVT, NewLoad,
  3438. DAG.getConstant(bestMask.trunc(bestWidth),
  3439. dl, newVT)),
  3440. DAG.getConstant(0LL, dl, newVT), Cond);
  3441. }
  3442. }
  3443. }
  3444. // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
  3445. if (N0.getOpcode() == ISD::ZERO_EXTEND) {
  3446. unsigned InSize = N0.getOperand(0).getValueSizeInBits();
  3447. // If the comparison constant has bits in the upper part, the
  3448. // zero-extended value could never match.
  3449. if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),
  3450. C1.getBitWidth() - InSize))) {
  3451. switch (Cond) {
  3452. case ISD::SETUGT:
  3453. case ISD::SETUGE:
  3454. case ISD::SETEQ:
  3455. return DAG.getConstant(0, dl, VT);
  3456. case ISD::SETULT:
  3457. case ISD::SETULE:
  3458. case ISD::SETNE:
  3459. return DAG.getConstant(1, dl, VT);
  3460. case ISD::SETGT:
  3461. case ISD::SETGE:
  3462. // True if the sign bit of C1 is set.
  3463. return DAG.getConstant(C1.isNegative(), dl, VT);
  3464. case ISD::SETLT:
  3465. case ISD::SETLE:
  3466. // True if the sign bit of C1 isn't set.
  3467. return DAG.getConstant(C1.isNonNegative(), dl, VT);
  3468. default:
  3469. break;
  3470. }
  3471. }
  3472. // Otherwise, we can perform the comparison with the low bits.
  3473. switch (Cond) {
  3474. case ISD::SETEQ:
  3475. case ISD::SETNE:
  3476. case ISD::SETUGT:
  3477. case ISD::SETUGE:
  3478. case ISD::SETULT:
  3479. case ISD::SETULE: {
  3480. EVT newVT = N0.getOperand(0).getValueType();
  3481. if (DCI.isBeforeLegalizeOps() ||
  3482. (isOperationLegal(ISD::SETCC, newVT) &&
  3483. isCondCodeLegal(Cond, newVT.getSimpleVT()))) {
  3484. EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
  3485. SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
  3486. SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
  3487. NewConst, Cond);
  3488. return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
  3489. }
  3490. break;
  3491. }
  3492. default:
  3493. break; // todo, be more careful with signed comparisons
  3494. }
  3495. } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
  3496. (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
  3497. !isSExtCheaperThanZExt(cast<VTSDNode>(N0.getOperand(1))->getVT(),
  3498. OpVT)) {
  3499. EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
  3500. unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
  3501. EVT ExtDstTy = N0.getValueType();
  3502. unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
  3503. // If the constant doesn't fit into the number of bits for the source of
  3504. // the sign extension, it is impossible for both sides to be equal.
  3505. if (C1.getMinSignedBits() > ExtSrcTyBits)
  3506. return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
  3507. assert(ExtDstTy == N0.getOperand(0).getValueType() &&
  3508. ExtDstTy != ExtSrcTy && "Unexpected types!");
  3509. APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
  3510. SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0),
  3511. DAG.getConstant(Imm, dl, ExtDstTy));
  3512. if (!DCI.isCalledByLegalizer())
  3513. DCI.AddToWorklist(ZextOp.getNode());
  3514. // Otherwise, make this a use of a zext.
  3515. return DAG.getSetCC(dl, VT, ZextOp,
  3516. DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
  3517. } else if ((N1C->isZero() || N1C->isOne()) &&
  3518. (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
  3519. // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC
  3520. if (N0.getOpcode() == ISD::SETCC &&
  3521. isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
  3522. (N0.getValueType() == MVT::i1 ||
  3523. getBooleanContents(N0.getOperand(0).getValueType()) ==
  3524. ZeroOrOneBooleanContent)) {
  3525. bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
  3526. if (TrueWhenTrue)
  3527. return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
  3528. // Invert the condition.
  3529. ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
  3530. CC = ISD::getSetCCInverse(CC, N0.getOperand(0).getValueType());
  3531. if (DCI.isBeforeLegalizeOps() ||
  3532. isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType()))
  3533. return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
  3534. }
  3535. if ((N0.getOpcode() == ISD::XOR ||
  3536. (N0.getOpcode() == ISD::AND &&
  3537. N0.getOperand(0).getOpcode() == ISD::XOR &&
  3538. N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
  3539. isOneConstant(N0.getOperand(1))) {
  3540. // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
  3541. // can only do this if the top bits are known zero.
  3542. unsigned BitWidth = N0.getValueSizeInBits();
  3543. if (DAG.MaskedValueIsZero(N0,
  3544. APInt::getHighBitsSet(BitWidth,
  3545. BitWidth-1))) {
  3546. // Okay, get the un-inverted input value.
  3547. SDValue Val;
  3548. if (N0.getOpcode() == ISD::XOR) {
  3549. Val = N0.getOperand(0);
  3550. } else {
  3551. assert(N0.getOpcode() == ISD::AND &&
  3552. N0.getOperand(0).getOpcode() == ISD::XOR);
  3553. // ((X^1)&1)^1 -> X & 1
  3554. Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
  3555. N0.getOperand(0).getOperand(0),
  3556. N0.getOperand(1));
  3557. }
  3558. return DAG.getSetCC(dl, VT, Val, N1,
  3559. Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
  3560. }
  3561. } else if (N1C->isOne()) {
  3562. SDValue Op0 = N0;
  3563. if (Op0.getOpcode() == ISD::TRUNCATE)
  3564. Op0 = Op0.getOperand(0);
  3565. if ((Op0.getOpcode() == ISD::XOR) &&
  3566. Op0.getOperand(0).getOpcode() == ISD::SETCC &&
  3567. Op0.getOperand(1).getOpcode() == ISD::SETCC) {
  3568. SDValue XorLHS = Op0.getOperand(0);
  3569. SDValue XorRHS = Op0.getOperand(1);
  3570. // Ensure that the input setccs return an i1 type or 0/1 value.
  3571. if (Op0.getValueType() == MVT::i1 ||
  3572. (getBooleanContents(XorLHS.getOperand(0).getValueType()) ==
  3573. ZeroOrOneBooleanContent &&
  3574. getBooleanContents(XorRHS.getOperand(0).getValueType()) ==
  3575. ZeroOrOneBooleanContent)) {
  3576. // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
  3577. Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
  3578. return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
  3579. }
  3580. }
  3581. if (Op0.getOpcode() == ISD::AND && isOneConstant(Op0.getOperand(1))) {
  3582. // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
  3583. if (Op0.getValueType().bitsGT(VT))
  3584. Op0 = DAG.getNode(ISD::AND, dl, VT,
  3585. DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
  3586. DAG.getConstant(1, dl, VT));
  3587. else if (Op0.getValueType().bitsLT(VT))
  3588. Op0 = DAG.getNode(ISD::AND, dl, VT,
  3589. DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
  3590. DAG.getConstant(1, dl, VT));
  3591. return DAG.getSetCC(dl, VT, Op0,
  3592. DAG.getConstant(0, dl, Op0.getValueType()),
  3593. Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
  3594. }
  3595. if (Op0.getOpcode() == ISD::AssertZext &&
  3596. cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
  3597. return DAG.getSetCC(dl, VT, Op0,
  3598. DAG.getConstant(0, dl, Op0.getValueType()),
  3599. Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
  3600. }
  3601. }
  3602. // Given:
  3603. // icmp eq/ne (urem %x, %y), 0
  3604. // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
  3605. // icmp eq/ne %x, 0
  3606. if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
  3607. (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
  3608. KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
  3609. KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
  3610. if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
  3611. return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
  3612. }
  3613. // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
  3614. // and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
  3615. if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
  3616. N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
  3617. N0.getConstantOperandAPInt(1) == OpVT.getScalarSizeInBits() - 1 &&
  3618. N1C && N1C->isAllOnes()) {
  3619. return DAG.getSetCC(dl, VT, N0.getOperand(0),
  3620. DAG.getConstant(0, dl, OpVT),
  3621. Cond == ISD::SETEQ ? ISD::SETLT : ISD::SETGE);
  3622. }
  3623. if (SDValue V =
  3624. optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
  3625. return V;
  3626. }
  3627. // These simplifications apply to splat vectors as well.
  3628. // TODO: Handle more splat vector cases.
  3629. if (auto *N1C = isConstOrConstSplat(N1)) {
  3630. const APInt &C1 = N1C->getAPIntValue();
  3631. APInt MinVal, MaxVal;
  3632. unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
  3633. if (ISD::isSignedIntSetCC(Cond)) {
  3634. MinVal = APInt::getSignedMinValue(OperandBitSize);
  3635. MaxVal = APInt::getSignedMaxValue(OperandBitSize);
  3636. } else {
  3637. MinVal = APInt::getMinValue(OperandBitSize);
  3638. MaxVal = APInt::getMaxValue(OperandBitSize);
  3639. }
  3640. // Canonicalize GE/LE comparisons to use GT/LT comparisons.
  3641. if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
  3642. // X >= MIN --> true
  3643. if (C1 == MinVal)
  3644. return DAG.getBoolConstant(true, dl, VT, OpVT);
  3645. if (!VT.isVector()) { // TODO: Support this for vectors.
  3646. // X >= C0 --> X > (C0 - 1)
  3647. APInt C = C1 - 1;
  3648. ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
  3649. if ((DCI.isBeforeLegalizeOps() ||
  3650. isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
  3651. (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
  3652. isLegalICmpImmediate(C.getSExtValue())))) {
  3653. return DAG.getSetCC(dl, VT, N0,
  3654. DAG.getConstant(C, dl, N1.getValueType()),
  3655. NewCC);
  3656. }
  3657. }
  3658. }
  3659. if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
  3660. // X <= MAX --> true
  3661. if (C1 == MaxVal)
  3662. return DAG.getBoolConstant(true, dl, VT, OpVT);
  3663. // X <= C0 --> X < (C0 + 1)
  3664. if (!VT.isVector()) { // TODO: Support this for vectors.
  3665. APInt C = C1 + 1;
  3666. ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
  3667. if ((DCI.isBeforeLegalizeOps() ||
  3668. isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
  3669. (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
  3670. isLegalICmpImmediate(C.getSExtValue())))) {
  3671. return DAG.getSetCC(dl, VT, N0,
  3672. DAG.getConstant(C, dl, N1.getValueType()),
  3673. NewCC);
  3674. }
  3675. }
  3676. }
  3677. if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
  3678. if (C1 == MinVal)
  3679. return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
  3680. // TODO: Support this for vectors after legalize ops.
  3681. if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
  3682. // Canonicalize setlt X, Max --> setne X, Max
  3683. if (C1 == MaxVal)
  3684. return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
  3685. // If we have setult X, 1, turn it into seteq X, 0
  3686. if (C1 == MinVal+1)
  3687. return DAG.getSetCC(dl, VT, N0,
  3688. DAG.getConstant(MinVal, dl, N0.getValueType()),
  3689. ISD::SETEQ);
  3690. }
  3691. }
  3692. if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
  3693. if (C1 == MaxVal)
  3694. return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
  3695. // TODO: Support this for vectors after legalize ops.
  3696. if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
  3697. // Canonicalize setgt X, Min --> setne X, Min
  3698. if (C1 == MinVal)
  3699. return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
  3700. // If we have setugt X, Max-1, turn it into seteq X, Max
  3701. if (C1 == MaxVal-1)
  3702. return DAG.getSetCC(dl, VT, N0,
  3703. DAG.getConstant(MaxVal, dl, N0.getValueType()),
  3704. ISD::SETEQ);
  3705. }
  3706. }
  3707. if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
  3708. // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
  3709. if (C1.isZero())
  3710. if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
  3711. VT, N0, N1, Cond, DCI, dl))
  3712. return CC;
  3713. // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
  3714. // For example, when high 32-bits of i64 X are known clear:
  3715. // all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0
  3716. // all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1
  3717. bool CmpZero = N1C->getAPIntValue().isZero();
  3718. bool CmpNegOne = N1C->getAPIntValue().isAllOnes();
  3719. if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
  3720. // Match or(lo,shl(hi,bw/2)) pattern.
  3721. auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
  3722. unsigned EltBits = V.getScalarValueSizeInBits();
  3723. if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
  3724. return false;
  3725. SDValue LHS = V.getOperand(0);
  3726. SDValue RHS = V.getOperand(1);
  3727. APInt HiBits = APInt::getHighBitsSet(EltBits, EltBits / 2);
  3728. // Unshifted element must have zero upperbits.
  3729. if (RHS.getOpcode() == ISD::SHL &&
  3730. isa<ConstantSDNode>(RHS.getOperand(1)) &&
  3731. RHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
  3732. DAG.MaskedValueIsZero(LHS, HiBits)) {
  3733. Lo = LHS;
  3734. Hi = RHS.getOperand(0);
  3735. return true;
  3736. }
  3737. if (LHS.getOpcode() == ISD::SHL &&
  3738. isa<ConstantSDNode>(LHS.getOperand(1)) &&
  3739. LHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
  3740. DAG.MaskedValueIsZero(RHS, HiBits)) {
  3741. Lo = RHS;
  3742. Hi = LHS.getOperand(0);
  3743. return true;
  3744. }
  3745. return false;
  3746. };
  3747. auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
  3748. unsigned EltBits = N0.getScalarValueSizeInBits();
  3749. unsigned HalfBits = EltBits / 2;
  3750. APInt HiBits = APInt::getHighBitsSet(EltBits, HalfBits);
  3751. SDValue LoBits = DAG.getConstant(~HiBits, dl, OpVT);
  3752. SDValue HiMask = DAG.getNode(ISD::AND, dl, OpVT, Hi, LoBits);
  3753. SDValue NewN0 =
  3754. DAG.getNode(CmpZero ? ISD::OR : ISD::AND, dl, OpVT, Lo, HiMask);
  3755. SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LoBits;
  3756. return DAG.getSetCC(dl, VT, NewN0, NewN1, Cond);
  3757. };
  3758. SDValue Lo, Hi;
  3759. if (IsConcat(N0, Lo, Hi))
  3760. return MergeConcat(Lo, Hi);
  3761. if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
  3762. SDValue Lo0, Lo1, Hi0, Hi1;
  3763. if (IsConcat(N0.getOperand(0), Lo0, Hi0) &&
  3764. IsConcat(N0.getOperand(1), Lo1, Hi1)) {
  3765. return MergeConcat(DAG.getNode(N0.getOpcode(), dl, OpVT, Lo0, Lo1),
  3766. DAG.getNode(N0.getOpcode(), dl, OpVT, Hi0, Hi1));
  3767. }
  3768. }
  3769. }
  3770. }
  3771. // If we have "setcc X, C0", check to see if we can shrink the immediate
  3772. // by changing cc.
  3773. // TODO: Support this for vectors after legalize ops.
  3774. if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
  3775. // SETUGT X, SINTMAX -> SETLT X, 0
  3776. // SETUGE X, SINTMIN -> SETLT X, 0
  3777. if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) ||
  3778. (Cond == ISD::SETUGE && C1.isMinSignedValue()))
  3779. return DAG.getSetCC(dl, VT, N0,
  3780. DAG.getConstant(0, dl, N1.getValueType()),
  3781. ISD::SETLT);
  3782. // SETULT X, SINTMIN -> SETGT X, -1
  3783. // SETULE X, SINTMAX -> SETGT X, -1
  3784. if ((Cond == ISD::SETULT && C1.isMinSignedValue()) ||
  3785. (Cond == ISD::SETULE && C1.isMaxSignedValue()))
  3786. return DAG.getSetCC(dl, VT, N0,
  3787. DAG.getAllOnesConstant(dl, N1.getValueType()),
  3788. ISD::SETGT);
  3789. }
  3790. }
  3791. // Back to non-vector simplifications.
  3792. // TODO: Can we do these for vector splats?
  3793. if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
  3794. const TargetLowering &TLI = DAG.getTargetLoweringInfo();
  3795. const APInt &C1 = N1C->getAPIntValue();
  3796. EVT ShValTy = N0.getValueType();
  3797. // Fold bit comparisons when we can. This will result in an
  3798. // incorrect value when boolean false is negative one, unless
  3799. // the bitsize is 1 in which case the false value is the same
  3800. // in practice regardless of the representation.
  3801. if ((VT.getSizeInBits() == 1 ||
  3802. getBooleanContents(N0.getValueType()) == ZeroOrOneBooleanContent) &&
  3803. (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
  3804. (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
  3805. N0.getOpcode() == ISD::AND) {
  3806. if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
  3807. EVT ShiftTy =
  3808. getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize());
  3809. if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
  3810. // Perform the xform if the AND RHS is a single bit.
  3811. unsigned ShCt = AndRHS->getAPIntValue().logBase2();
  3812. if (AndRHS->getAPIntValue().isPowerOf2() &&
  3813. !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
  3814. return DAG.getNode(ISD::TRUNCATE, dl, VT,
  3815. DAG.getNode(ISD::SRL, dl, ShValTy, N0,
  3816. DAG.getConstant(ShCt, dl, ShiftTy)));
  3817. }
  3818. } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
  3819. // (X & 8) == 8 --> (X & 8) >> 3
  3820. // Perform the xform if C1 is a single bit.
  3821. unsigned ShCt = C1.logBase2();
  3822. if (C1.isPowerOf2() &&
  3823. !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
  3824. return DAG.getNode(ISD::TRUNCATE, dl, VT,
  3825. DAG.getNode(ISD::SRL, dl, ShValTy, N0,
  3826. DAG.getConstant(ShCt, dl, ShiftTy)));
  3827. }
  3828. }
  3829. }
  3830. }
  3831. if (C1.getMinSignedBits() <= 64 &&
  3832. !isLegalICmpImmediate(C1.getSExtValue())) {
  3833. EVT ShiftTy = getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize());
  3834. // (X & -256) == 256 -> (X >> 8) == 1
  3835. if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
  3836. N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
  3837. if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
  3838. const APInt &AndRHSC = AndRHS->getAPIntValue();
  3839. if (AndRHSC.isNegatedPowerOf2() && (AndRHSC & C1) == C1) {
  3840. unsigned ShiftBits = AndRHSC.countTrailingZeros();
  3841. if (!TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
  3842. SDValue Shift =
  3843. DAG.getNode(ISD::SRL, dl, ShValTy, N0.getOperand(0),
  3844. DAG.getConstant(ShiftBits, dl, ShiftTy));
  3845. SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, ShValTy);
  3846. return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
  3847. }
  3848. }
  3849. }
  3850. } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
  3851. Cond == ISD::SETULE || Cond == ISD::SETUGT) {
  3852. bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
  3853. // X < 0x100000000 -> (X >> 32) < 1
  3854. // X >= 0x100000000 -> (X >> 32) >= 1
  3855. // X <= 0x0ffffffff -> (X >> 32) < 1
  3856. // X > 0x0ffffffff -> (X >> 32) >= 1
  3857. unsigned ShiftBits;
  3858. APInt NewC = C1;
  3859. ISD::CondCode NewCond = Cond;
  3860. if (AdjOne) {
  3861. ShiftBits = C1.countTrailingOnes();
  3862. NewC = NewC + 1;
  3863. NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
  3864. } else {
  3865. ShiftBits = C1.countTrailingZeros();
  3866. }
  3867. NewC.lshrInPlace(ShiftBits);
  3868. if (ShiftBits && NewC.getMinSignedBits() <= 64 &&
  3869. isLegalICmpImmediate(NewC.getSExtValue()) &&
  3870. !TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
  3871. SDValue Shift = DAG.getNode(ISD::SRL, dl, ShValTy, N0,
  3872. DAG.getConstant(ShiftBits, dl, ShiftTy));
  3873. SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
  3874. return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
  3875. }
  3876. }
  3877. }
  3878. }
  3879. if (!isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1)) {
  3880. auto *CFP = cast<ConstantFPSDNode>(N1);
  3881. assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
  3882. // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
  3883. // constant if knowing that the operand is non-nan is enough. We prefer to
  3884. // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
  3885. // materialize 0.0.
  3886. if (Cond == ISD::SETO || Cond == ISD::SETUO)
  3887. return DAG.getSetCC(dl, VT, N0, N0, Cond);
  3888. // setcc (fneg x), C -> setcc swap(pred) x, -C
  3889. if (N0.getOpcode() == ISD::FNEG) {
  3890. ISD::CondCode SwapCond = ISD::getSetCCSwappedOperands(Cond);
  3891. if (DCI.isBeforeLegalizeOps() ||
  3892. isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
  3893. SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
  3894. return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
  3895. }
  3896. }
  3897. // If the condition is not legal, see if we can find an equivalent one
  3898. // which is legal.
  3899. if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) {
  3900. // If the comparison was an awkward floating-point == or != and one of
  3901. // the comparison operands is infinity or negative infinity, convert the
  3902. // condition to a less-awkward <= or >=.
  3903. if (CFP->getValueAPF().isInfinity()) {
  3904. bool IsNegInf = CFP->getValueAPF().isNegative();
  3905. ISD::CondCode NewCond = ISD::SETCC_INVALID;
  3906. switch (Cond) {
  3907. case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
  3908. case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
  3909. case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
  3910. case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
  3911. default: break;
  3912. }
  3913. if (NewCond != ISD::SETCC_INVALID &&
  3914. isCondCodeLegal(NewCond, N0.getSimpleValueType()))
  3915. return DAG.getSetCC(dl, VT, N0, N1, NewCond);
  3916. }
  3917. }
  3918. }
  3919. if (N0 == N1) {
  3920. // The sext(setcc()) => setcc() optimization relies on the appropriate
  3921. // constant being emitted.
  3922. assert(!N0.getValueType().isInteger() &&
  3923. "Integer types should be handled by FoldSetCC");
  3924. bool EqTrue = ISD::isTrueWhenEqual(Cond);
  3925. unsigned UOF = ISD::getUnorderedFlavor(Cond);
  3926. if (UOF == 2) // FP operators that are undefined on NaNs.
  3927. return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
  3928. if (UOF == unsigned(EqTrue))
  3929. return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
  3930. // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
  3931. // if it is not already.
  3932. ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
  3933. if (NewCond != Cond &&
  3934. (DCI.isBeforeLegalizeOps() ||
  3935. isCondCodeLegal(NewCond, N0.getSimpleValueType())))
  3936. return DAG.getSetCC(dl, VT, N0, N1, NewCond);
  3937. }
  3938. if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
  3939. N0.getValueType().isInteger()) {
  3940. if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
  3941. N0.getOpcode() == ISD::XOR) {
  3942. // Simplify (X+Y) == (X+Z) --> Y == Z
  3943. if (N0.getOpcode() == N1.getOpcode()) {
  3944. if (N0.getOperand(0) == N1.getOperand(0))
  3945. return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
  3946. if (N0.getOperand(1) == N1.getOperand(1))
  3947. return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
  3948. if (isCommutativeBinOp(N0.getOpcode())) {
  3949. // If X op Y == Y op X, try other combinations.
  3950. if (N0.getOperand(0) == N1.getOperand(1))
  3951. return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
  3952. Cond);
  3953. if (N0.getOperand(1) == N1.getOperand(0))
  3954. return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
  3955. Cond);
  3956. }
  3957. }
  3958. // If RHS is a legal immediate value for a compare instruction, we need
  3959. // to be careful about increasing register pressure needlessly.
  3960. bool LegalRHSImm = false;
  3961. if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
  3962. if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
  3963. // Turn (X+C1) == C2 --> X == C2-C1
  3964. if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) {
  3965. return DAG.getSetCC(dl, VT, N0.getOperand(0),
  3966. DAG.getConstant(RHSC->getAPIntValue()-
  3967. LHSR->getAPIntValue(),
  3968. dl, N0.getValueType()), Cond);
  3969. }
  3970. // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0.
  3971. if (N0.getOpcode() == ISD::XOR)
  3972. // If we know that all of the inverted bits are zero, don't bother
  3973. // performing the inversion.
  3974. if (DAG.MaskedValueIsZero(N0.getOperand(0), ~LHSR->getAPIntValue()))
  3975. return
  3976. DAG.getSetCC(dl, VT, N0.getOperand(0),
  3977. DAG.getConstant(LHSR->getAPIntValue() ^
  3978. RHSC->getAPIntValue(),
  3979. dl, N0.getValueType()),
  3980. Cond);
  3981. }
  3982. // Turn (C1-X) == C2 --> X == C1-C2
  3983. if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
  3984. if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) {
  3985. return
  3986. DAG.getSetCC(dl, VT, N0.getOperand(1),
  3987. DAG.getConstant(SUBC->getAPIntValue() -
  3988. RHSC->getAPIntValue(),
  3989. dl, N0.getValueType()),
  3990. Cond);
  3991. }
  3992. }
  3993. // Could RHSC fold directly into a compare?
  3994. if (RHSC->getValueType(0).getSizeInBits() <= 64)
  3995. LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
  3996. }
  3997. // (X+Y) == X --> Y == 0 and similar folds.
  3998. // Don't do this if X is an immediate that can fold into a cmp
  3999. // instruction and X+Y has other uses. It could be an induction variable
  4000. // chain, and the transform would increase register pressure.
  4001. if (!LegalRHSImm || N0.hasOneUse())
  4002. if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
  4003. return V;
  4004. }
  4005. if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
  4006. N1.getOpcode() == ISD::XOR)
  4007. if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
  4008. return V;
  4009. if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
  4010. return V;
  4011. }
  4012. // Fold remainder of division by a constant.
  4013. if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
  4014. N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
  4015. AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
  4016. // When division is cheap or optimizing for minimum size,
  4017. // fall through to DIVREM creation by skipping this fold.
  4018. if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
  4019. if (N0.getOpcode() == ISD::UREM) {
  4020. if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
  4021. return Folded;
  4022. } else if (N0.getOpcode() == ISD::SREM) {
  4023. if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
  4024. return Folded;
  4025. }
  4026. }
  4027. }
  4028. // Fold away ALL boolean setcc's.
  4029. if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
  4030. SDValue Temp;
  4031. switch (Cond) {
  4032. default: llvm_unreachable("Unknown integer setcc!");
  4033. case ISD::SETEQ: // X == Y -> ~(X^Y)
  4034. Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
  4035. N0 = DAG.getNOT(dl, Temp, OpVT);
  4036. if (!DCI.isCalledByLegalizer())
  4037. DCI.AddToWorklist(Temp.getNode());
  4038. break;
  4039. case ISD::SETNE: // X != Y --> (X^Y)
  4040. N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
  4041. break;
  4042. case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
  4043. case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
  4044. Temp = DAG.getNOT(dl, N0, OpVT);
  4045. N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
  4046. if (!DCI.isCalledByLegalizer())
  4047. DCI.AddToWorklist(Temp.getNode());
  4048. break;
  4049. case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
  4050. case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
  4051. Temp = DAG.getNOT(dl, N1, OpVT);
  4052. N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
  4053. if (!DCI.isCalledByLegalizer())
  4054. DCI.AddToWorklist(Temp.getNode());
  4055. break;
  4056. case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
  4057. case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
  4058. Temp = DAG.getNOT(dl, N0, OpVT);
  4059. N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
  4060. if (!DCI.isCalledByLegalizer())
  4061. DCI.AddToWorklist(Temp.getNode());
  4062. break;
  4063. case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
  4064. case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
  4065. Temp = DAG.getNOT(dl, N1, OpVT);
  4066. N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
  4067. break;
  4068. }
  4069. if (VT.getScalarType() != MVT::i1) {
  4070. if (!DCI.isCalledByLegalizer())
  4071. DCI.AddToWorklist(N0.getNode());
  4072. // FIXME: If running after legalize, we probably can't do this.
  4073. ISD::NodeType ExtendCode = getExtendForContent(getBooleanContents(OpVT));
  4074. N0 = DAG.getNode(ExtendCode, dl, VT, N0);
  4075. }
  4076. return N0;
  4077. }
  4078. // Could not fold it.
  4079. return SDValue();
  4080. }
  4081. /// Returns true (and the GlobalValue and the offset) if the node is a
  4082. /// GlobalAddress + offset.
  4083. bool TargetLowering::isGAPlusOffset(SDNode *WN, const GlobalValue *&GA,
  4084. int64_t &Offset) const {
  4085. SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
  4086. if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
  4087. GA = GASD->getGlobal();
  4088. Offset += GASD->getOffset();
  4089. return true;
  4090. }
  4091. if (N->getOpcode() == ISD::ADD) {
  4092. SDValue N1 = N->getOperand(0);
  4093. SDValue N2 = N->getOperand(1);
  4094. if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
  4095. if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
  4096. Offset += V->getSExtValue();
  4097. return true;
  4098. }
  4099. } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
  4100. if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
  4101. Offset += V->getSExtValue();
  4102. return true;
  4103. }
  4104. }
  4105. }
  4106. return false;
  4107. }
  4108. SDValue TargetLowering::PerformDAGCombine(SDNode *N,
  4109. DAGCombinerInfo &DCI) const {
  4110. // Default implementation: no optimization.
  4111. return SDValue();
  4112. }
  4113. //===----------------------------------------------------------------------===//
  4114. // Inline Assembler Implementation Methods
  4115. //===----------------------------------------------------------------------===//
  4116. TargetLowering::ConstraintType
  4117. TargetLowering::getConstraintType(StringRef Constraint) const {
  4118. unsigned S = Constraint.size();
  4119. if (S == 1) {
  4120. switch (Constraint[0]) {
  4121. default: break;
  4122. case 'r':
  4123. return C_RegisterClass;
  4124. case 'm': // memory
  4125. case 'o': // offsetable
  4126. case 'V': // not offsetable
  4127. return C_Memory;
  4128. case 'n': // Simple Integer
  4129. case 'E': // Floating Point Constant
  4130. case 'F': // Floating Point Constant
  4131. return C_Immediate;
  4132. case 'i': // Simple Integer or Relocatable Constant
  4133. case 's': // Relocatable Constant
  4134. case 'p': // Address.
  4135. case 'X': // Allow ANY value.
  4136. case 'I': // Target registers.
  4137. case 'J':
  4138. case 'K':
  4139. case 'L':
  4140. case 'M':
  4141. case 'N':
  4142. case 'O':
  4143. case 'P':
  4144. case '<':
  4145. case '>':
  4146. return C_Other;
  4147. }
  4148. }
  4149. if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
  4150. if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
  4151. return C_Memory;
  4152. return C_Register;
  4153. }
  4154. return C_Unknown;
  4155. }
  4156. /// Try to replace an X constraint, which matches anything, with another that
  4157. /// has more specific requirements based on the type of the corresponding
  4158. /// operand.
  4159. const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
  4160. if (ConstraintVT.isInteger())
  4161. return "r";
  4162. if (ConstraintVT.isFloatingPoint())
  4163. return "f"; // works for many targets
  4164. return nullptr;
  4165. }
  4166. SDValue TargetLowering::LowerAsmOutputForConstraint(
  4167. SDValue &Chain, SDValue &Flag, const SDLoc &DL,
  4168. const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
  4169. return SDValue();
  4170. }
  4171. /// Lower the specified operand into the Ops vector.
  4172. /// If it is invalid, don't add anything to Ops.
  4173. void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
  4174. std::string &Constraint,
  4175. std::vector<SDValue> &Ops,
  4176. SelectionDAG &DAG) const {
  4177. if (Constraint.length() > 1) return;
  4178. char ConstraintLetter = Constraint[0];
  4179. switch (ConstraintLetter) {
  4180. default: break;
  4181. case 'X': // Allows any operand
  4182. case 'i': // Simple Integer or Relocatable Constant
  4183. case 'n': // Simple Integer
  4184. case 's': { // Relocatable Constant
  4185. ConstantSDNode *C;
  4186. uint64_t Offset = 0;
  4187. // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
  4188. // etc., since getelementpointer is variadic. We can't use
  4189. // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
  4190. // while in this case the GA may be furthest from the root node which is
  4191. // likely an ISD::ADD.
  4192. while (true) {
  4193. if ((C = dyn_cast<ConstantSDNode>(Op)) && ConstraintLetter != 's') {
  4194. // gcc prints these as sign extended. Sign extend value to 64 bits
  4195. // now; without this it would get ZExt'd later in
  4196. // ScheduleDAGSDNodes::EmitNode, which is very generic.
  4197. bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
  4198. BooleanContent BCont = getBooleanContents(MVT::i64);
  4199. ISD::NodeType ExtOpc =
  4200. IsBool ? getExtendForContent(BCont) : ISD::SIGN_EXTEND;
  4201. int64_t ExtVal =
  4202. ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
  4203. Ops.push_back(
  4204. DAG.getTargetConstant(Offset + ExtVal, SDLoc(C), MVT::i64));
  4205. return;
  4206. }
  4207. if (ConstraintLetter != 'n') {
  4208. if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
  4209. Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
  4210. GA->getValueType(0),
  4211. Offset + GA->getOffset()));
  4212. return;
  4213. }
  4214. if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
  4215. Ops.push_back(DAG.getTargetBlockAddress(
  4216. BA->getBlockAddress(), BA->getValueType(0),
  4217. Offset + BA->getOffset(), BA->getTargetFlags()));
  4218. return;
  4219. }
  4220. if (isa<BasicBlockSDNode>(Op)) {
  4221. Ops.push_back(Op);
  4222. return;
  4223. }
  4224. }
  4225. const unsigned OpCode = Op.getOpcode();
  4226. if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
  4227. if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
  4228. Op = Op.getOperand(1);
  4229. // Subtraction is not commutative.
  4230. else if (OpCode == ISD::ADD &&
  4231. (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
  4232. Op = Op.getOperand(0);
  4233. else
  4234. return;
  4235. Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
  4236. continue;
  4237. }
  4238. return;
  4239. }
  4240. break;
  4241. }
  4242. }
  4243. }
  4244. std::pair<unsigned, const TargetRegisterClass *>
  4245. TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
  4246. StringRef Constraint,
  4247. MVT VT) const {
  4248. if (Constraint.empty() || Constraint[0] != '{')
  4249. return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
  4250. assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
  4251. // Remove the braces from around the name.
  4252. StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
  4253. std::pair<unsigned, const TargetRegisterClass *> R =
  4254. std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
  4255. // Figure out which register class contains this reg.
  4256. for (const TargetRegisterClass *RC : RI->regclasses()) {
  4257. // If none of the value types for this register class are valid, we
  4258. // can't use it. For example, 64-bit reg classes on 32-bit targets.
  4259. if (!isLegalRC(*RI, *RC))
  4260. continue;
  4261. for (const MCPhysReg &PR : *RC) {
  4262. if (RegName.equals_insensitive(RI->getRegAsmName(PR))) {
  4263. std::pair<unsigned, const TargetRegisterClass *> S =
  4264. std::make_pair(PR, RC);
  4265. // If this register class has the requested value type, return it,
  4266. // otherwise keep searching and return the first class found
  4267. // if no other is found which explicitly has the requested type.
  4268. if (RI->isTypeLegalForClass(*RC, VT))
  4269. return S;
  4270. if (!R.second)
  4271. R = S;
  4272. }
  4273. }
  4274. }
  4275. return R;
  4276. }
  4277. //===----------------------------------------------------------------------===//
  4278. // Constraint Selection.
  4279. /// Return true of this is an input operand that is a matching constraint like
  4280. /// "4".
  4281. bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
  4282. assert(!ConstraintCode.empty() && "No known constraint!");
  4283. return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
  4284. }
  4285. /// If this is an input matching constraint, this method returns the output
  4286. /// operand it matches.
  4287. unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
  4288. assert(!ConstraintCode.empty() && "No known constraint!");
  4289. return atoi(ConstraintCode.c_str());
  4290. }
  4291. /// Split up the constraint string from the inline assembly value into the
  4292. /// specific constraints and their prefixes, and also tie in the associated
  4293. /// operand values.
  4294. /// If this returns an empty vector, and if the constraint string itself
  4295. /// isn't empty, there was an error parsing.
  4296. TargetLowering::AsmOperandInfoVector
  4297. TargetLowering::ParseConstraints(const DataLayout &DL,
  4298. const TargetRegisterInfo *TRI,
  4299. const CallBase &Call) const {
  4300. /// Information about all of the constraints.
  4301. AsmOperandInfoVector ConstraintOperands;
  4302. const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
  4303. unsigned maCount = 0; // Largest number of multiple alternative constraints.
  4304. // Do a prepass over the constraints, canonicalizing them, and building up the
  4305. // ConstraintOperands list.
  4306. unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
  4307. unsigned ResNo = 0; // ResNo - The result number of the next output.
  4308. for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
  4309. ConstraintOperands.emplace_back(std::move(CI));
  4310. AsmOperandInfo &OpInfo = ConstraintOperands.back();
  4311. // Update multiple alternative constraint count.
  4312. if (OpInfo.multipleAlternatives.size() > maCount)
  4313. maCount = OpInfo.multipleAlternatives.size();
  4314. OpInfo.ConstraintVT = MVT::Other;
  4315. // Compute the value type for each operand.
  4316. switch (OpInfo.Type) {
  4317. case InlineAsm::isOutput:
  4318. // Indirect outputs just consume an argument.
  4319. if (OpInfo.isIndirect) {
  4320. OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
  4321. break;
  4322. }
  4323. // The return value of the call is this value. As such, there is no
  4324. // corresponding argument.
  4325. assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
  4326. if (StructType *STy = dyn_cast<StructType>(Call.getType())) {
  4327. OpInfo.ConstraintVT =
  4328. getSimpleValueType(DL, STy->getElementType(ResNo));
  4329. } else {
  4330. assert(ResNo == 0 && "Asm only has one result!");
  4331. OpInfo.ConstraintVT =
  4332. getAsmOperandValueType(DL, Call.getType()).getSimpleVT();
  4333. }
  4334. ++ResNo;
  4335. break;
  4336. case InlineAsm::isInput:
  4337. OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
  4338. break;
  4339. case InlineAsm::isClobber:
  4340. // Nothing to do.
  4341. break;
  4342. }
  4343. if (OpInfo.CallOperandVal) {
  4344. llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
  4345. if (OpInfo.isIndirect) {
  4346. OpTy = Call.getAttributes().getParamElementType(ArgNo);
  4347. assert(OpTy && "Indirect opernad must have elementtype attribute");
  4348. }
  4349. // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
  4350. if (StructType *STy = dyn_cast<StructType>(OpTy))
  4351. if (STy->getNumElements() == 1)
  4352. OpTy = STy->getElementType(0);
  4353. // If OpTy is not a single value, it may be a struct/union that we
  4354. // can tile with integers.
  4355. if (!OpTy->isSingleValueType() && OpTy->isSized()) {
  4356. unsigned BitSize = DL.getTypeSizeInBits(OpTy);
  4357. switch (BitSize) {
  4358. default: break;
  4359. case 1:
  4360. case 8:
  4361. case 16:
  4362. case 32:
  4363. case 64:
  4364. case 128:
  4365. OpInfo.ConstraintVT =
  4366. MVT::getVT(IntegerType::get(OpTy->getContext(), BitSize), true);
  4367. break;
  4368. }
  4369. } else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) {
  4370. unsigned PtrSize = DL.getPointerSizeInBits(PT->getAddressSpace());
  4371. OpInfo.ConstraintVT = MVT::getIntegerVT(PtrSize);
  4372. } else {
  4373. OpInfo.ConstraintVT = MVT::getVT(OpTy, true);
  4374. }
  4375. ArgNo++;
  4376. }
  4377. }
  4378. // If we have multiple alternative constraints, select the best alternative.
  4379. if (!ConstraintOperands.empty()) {
  4380. if (maCount) {
  4381. unsigned bestMAIndex = 0;
  4382. int bestWeight = -1;
  4383. // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
  4384. int weight = -1;
  4385. unsigned maIndex;
  4386. // Compute the sums of the weights for each alternative, keeping track
  4387. // of the best (highest weight) one so far.
  4388. for (maIndex = 0; maIndex < maCount; ++maIndex) {
  4389. int weightSum = 0;
  4390. for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
  4391. cIndex != eIndex; ++cIndex) {
  4392. AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
  4393. if (OpInfo.Type == InlineAsm::isClobber)
  4394. continue;
  4395. // If this is an output operand with a matching input operand,
  4396. // look up the matching input. If their types mismatch, e.g. one
  4397. // is an integer, the other is floating point, or their sizes are
  4398. // different, flag it as an maCantMatch.
  4399. if (OpInfo.hasMatchingInput()) {
  4400. AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
  4401. if (OpInfo.ConstraintVT != Input.ConstraintVT) {
  4402. if ((OpInfo.ConstraintVT.isInteger() !=
  4403. Input.ConstraintVT.isInteger()) ||
  4404. (OpInfo.ConstraintVT.getSizeInBits() !=
  4405. Input.ConstraintVT.getSizeInBits())) {
  4406. weightSum = -1; // Can't match.
  4407. break;
  4408. }
  4409. }
  4410. }
  4411. weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
  4412. if (weight == -1) {
  4413. weightSum = -1;
  4414. break;
  4415. }
  4416. weightSum += weight;
  4417. }
  4418. // Update best.
  4419. if (weightSum > bestWeight) {
  4420. bestWeight = weightSum;
  4421. bestMAIndex = maIndex;
  4422. }
  4423. }
  4424. // Now select chosen alternative in each constraint.
  4425. for (AsmOperandInfo &cInfo : ConstraintOperands)
  4426. if (cInfo.Type != InlineAsm::isClobber)
  4427. cInfo.selectAlternative(bestMAIndex);
  4428. }
  4429. }
  4430. // Check and hook up tied operands, choose constraint code to use.
  4431. for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
  4432. cIndex != eIndex; ++cIndex) {
  4433. AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
  4434. // If this is an output operand with a matching input operand, look up the
  4435. // matching input. If their types mismatch, e.g. one is an integer, the
  4436. // other is floating point, or their sizes are different, flag it as an
  4437. // error.
  4438. if (OpInfo.hasMatchingInput()) {
  4439. AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
  4440. if (OpInfo.ConstraintVT != Input.ConstraintVT) {
  4441. std::pair<unsigned, const TargetRegisterClass *> MatchRC =
  4442. getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
  4443. OpInfo.ConstraintVT);
  4444. std::pair<unsigned, const TargetRegisterClass *> InputRC =
  4445. getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
  4446. Input.ConstraintVT);
  4447. if ((OpInfo.ConstraintVT.isInteger() !=
  4448. Input.ConstraintVT.isInteger()) ||
  4449. (MatchRC.second != InputRC.second)) {
  4450. report_fatal_error("Unsupported asm: input constraint"
  4451. " with a matching output constraint of"
  4452. " incompatible type!");
  4453. }
  4454. }
  4455. }
  4456. }
  4457. return ConstraintOperands;
  4458. }
  4459. /// Return an integer indicating how general CT is.
  4460. static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
  4461. switch (CT) {
  4462. case TargetLowering::C_Immediate:
  4463. case TargetLowering::C_Other:
  4464. case TargetLowering::C_Unknown:
  4465. return 0;
  4466. case TargetLowering::C_Register:
  4467. return 1;
  4468. case TargetLowering::C_RegisterClass:
  4469. return 2;
  4470. case TargetLowering::C_Memory:
  4471. return 3;
  4472. }
  4473. llvm_unreachable("Invalid constraint type");
  4474. }
  4475. /// Examine constraint type and operand type and determine a weight value.
  4476. /// This object must already have been set up with the operand type
  4477. /// and the current alternative constraint selected.
  4478. TargetLowering::ConstraintWeight
  4479. TargetLowering::getMultipleConstraintMatchWeight(
  4480. AsmOperandInfo &info, int maIndex) const {
  4481. InlineAsm::ConstraintCodeVector *rCodes;
  4482. if (maIndex >= (int)info.multipleAlternatives.size())
  4483. rCodes = &info.Codes;
  4484. else
  4485. rCodes = &info.multipleAlternatives[maIndex].Codes;
  4486. ConstraintWeight BestWeight = CW_Invalid;
  4487. // Loop over the options, keeping track of the most general one.
  4488. for (const std::string &rCode : *rCodes) {
  4489. ConstraintWeight weight =
  4490. getSingleConstraintMatchWeight(info, rCode.c_str());
  4491. if (weight > BestWeight)
  4492. BestWeight = weight;
  4493. }
  4494. return BestWeight;
  4495. }
  4496. /// Examine constraint type and operand type and determine a weight value.
  4497. /// This object must already have been set up with the operand type
  4498. /// and the current alternative constraint selected.
  4499. TargetLowering::ConstraintWeight
  4500. TargetLowering::getSingleConstraintMatchWeight(
  4501. AsmOperandInfo &info, const char *constraint) const {
  4502. ConstraintWeight weight = CW_Invalid;
  4503. Value *CallOperandVal = info.CallOperandVal;
  4504. // If we don't have a value, we can't do a match,
  4505. // but allow it at the lowest weight.
  4506. if (!CallOperandVal)
  4507. return CW_Default;
  4508. // Look at the constraint type.
  4509. switch (*constraint) {
  4510. case 'i': // immediate integer.
  4511. case 'n': // immediate integer with a known value.
  4512. if (isa<ConstantInt>(CallOperandVal))
  4513. weight = CW_Constant;
  4514. break;
  4515. case 's': // non-explicit intregal immediate.
  4516. if (isa<GlobalValue>(CallOperandVal))
  4517. weight = CW_Constant;
  4518. break;
  4519. case 'E': // immediate float if host format.
  4520. case 'F': // immediate float.
  4521. if (isa<ConstantFP>(CallOperandVal))
  4522. weight = CW_Constant;
  4523. break;
  4524. case '<': // memory operand with autodecrement.
  4525. case '>': // memory operand with autoincrement.
  4526. case 'm': // memory operand.
  4527. case 'o': // offsettable memory operand
  4528. case 'V': // non-offsettable memory operand
  4529. weight = CW_Memory;
  4530. break;
  4531. case 'r': // general register.
  4532. case 'g': // general register, memory operand or immediate integer.
  4533. // note: Clang converts "g" to "imr".
  4534. if (CallOperandVal->getType()->isIntegerTy())
  4535. weight = CW_Register;
  4536. break;
  4537. case 'X': // any operand.
  4538. default:
  4539. weight = CW_Default;
  4540. break;
  4541. }
  4542. return weight;
  4543. }
  4544. /// If there are multiple different constraints that we could pick for this
  4545. /// operand (e.g. "imr") try to pick the 'best' one.
  4546. /// This is somewhat tricky: constraints fall into four classes:
  4547. /// Other -> immediates and magic values
  4548. /// Register -> one specific register
  4549. /// RegisterClass -> a group of regs
  4550. /// Memory -> memory
  4551. /// Ideally, we would pick the most specific constraint possible: if we have
  4552. /// something that fits into a register, we would pick it. The problem here
  4553. /// is that if we have something that could either be in a register or in
  4554. /// memory that use of the register could cause selection of *other*
  4555. /// operands to fail: they might only succeed if we pick memory. Because of
  4556. /// this the heuristic we use is:
  4557. ///
  4558. /// 1) If there is an 'other' constraint, and if the operand is valid for
  4559. /// that constraint, use it. This makes us take advantage of 'i'
  4560. /// constraints when available.
  4561. /// 2) Otherwise, pick the most general constraint present. This prefers
  4562. /// 'm' over 'r', for example.
  4563. ///
  4564. static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
  4565. const TargetLowering &TLI,
  4566. SDValue Op, SelectionDAG *DAG) {
  4567. assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options");
  4568. unsigned BestIdx = 0;
  4569. TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown;
  4570. int BestGenerality = -1;
  4571. // Loop over the options, keeping track of the most general one.
  4572. for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) {
  4573. TargetLowering::ConstraintType CType =
  4574. TLI.getConstraintType(OpInfo.Codes[i]);
  4575. // Indirect 'other' or 'immediate' constraints are not allowed.
  4576. if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
  4577. CType == TargetLowering::C_Register ||
  4578. CType == TargetLowering::C_RegisterClass))
  4579. continue;
  4580. // If this is an 'other' or 'immediate' constraint, see if the operand is
  4581. // valid for it. For example, on X86 we might have an 'rI' constraint. If
  4582. // the operand is an integer in the range [0..31] we want to use I (saving a
  4583. // load of a register), otherwise we must use 'r'.
  4584. if ((CType == TargetLowering::C_Other ||
  4585. CType == TargetLowering::C_Immediate) && Op.getNode()) {
  4586. assert(OpInfo.Codes[i].size() == 1 &&
  4587. "Unhandled multi-letter 'other' constraint");
  4588. std::vector<SDValue> ResultOps;
  4589. TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i],
  4590. ResultOps, *DAG);
  4591. if (!ResultOps.empty()) {
  4592. BestType = CType;
  4593. BestIdx = i;
  4594. break;
  4595. }
  4596. }
  4597. // Things with matching constraints can only be registers, per gcc
  4598. // documentation. This mainly affects "g" constraints.
  4599. if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
  4600. continue;
  4601. // This constraint letter is more general than the previous one, use it.
  4602. int Generality = getConstraintGenerality(CType);
  4603. if (Generality > BestGenerality) {
  4604. BestType = CType;
  4605. BestIdx = i;
  4606. BestGenerality = Generality;
  4607. }
  4608. }
  4609. OpInfo.ConstraintCode = OpInfo.Codes[BestIdx];
  4610. OpInfo.ConstraintType = BestType;
  4611. }
  4612. /// Determines the constraint code and constraint type to use for the specific
  4613. /// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
  4614. void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
  4615. SDValue Op,
  4616. SelectionDAG *DAG) const {
  4617. assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
  4618. // Single-letter constraints ('r') are very common.
  4619. if (OpInfo.Codes.size() == 1) {
  4620. OpInfo.ConstraintCode = OpInfo.Codes[0];
  4621. OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
  4622. } else {
  4623. ChooseConstraint(OpInfo, *this, Op, DAG);
  4624. }
  4625. // 'X' matches anything.
  4626. if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
  4627. // Constants are handled elsewhere. For Functions, the type here is the
  4628. // type of the result, which is not what we want to look at; leave them
  4629. // alone.
  4630. Value *v = OpInfo.CallOperandVal;
  4631. if (isa<ConstantInt>(v) || isa<Function>(v)) {
  4632. return;
  4633. }
  4634. if (isa<BasicBlock>(v) || isa<BlockAddress>(v)) {
  4635. OpInfo.ConstraintCode = "i";
  4636. return;
  4637. }
  4638. // Otherwise, try to resolve it to something we know about by looking at
  4639. // the actual operand type.
  4640. if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
  4641. OpInfo.ConstraintCode = Repl;
  4642. OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
  4643. }
  4644. }
  4645. }
  4646. /// Given an exact SDIV by a constant, create a multiplication
  4647. /// with the multiplicative inverse of the constant.
  4648. static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
  4649. const SDLoc &dl, SelectionDAG &DAG,
  4650. SmallVectorImpl<SDNode *> &Created) {
  4651. SDValue Op0 = N->getOperand(0);
  4652. SDValue Op1 = N->getOperand(1);
  4653. EVT VT = N->getValueType(0);
  4654. EVT SVT = VT.getScalarType();
  4655. EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
  4656. EVT ShSVT = ShVT.getScalarType();
  4657. bool UseSRA = false;
  4658. SmallVector<SDValue, 16> Shifts, Factors;
  4659. auto BuildSDIVPattern = [&](ConstantSDNode *C) {
  4660. if (C->isZero())
  4661. return false;
  4662. APInt Divisor = C->getAPIntValue();
  4663. unsigned Shift = Divisor.countTrailingZeros();
  4664. if (Shift) {
  4665. Divisor.ashrInPlace(Shift);
  4666. UseSRA = true;
  4667. }
  4668. // Calculate the multiplicative inverse, using Newton's method.
  4669. APInt t;
  4670. APInt Factor = Divisor;
  4671. while ((t = Divisor * Factor) != 1)
  4672. Factor *= APInt(Divisor.getBitWidth(), 2) - t;
  4673. Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
  4674. Factors.push_back(DAG.getConstant(Factor, dl, SVT));
  4675. return true;
  4676. };
  4677. // Collect all magic values from the build vector.
  4678. if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
  4679. return SDValue();
  4680. SDValue Shift, Factor;
  4681. if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
  4682. Shift = DAG.getBuildVector(ShVT, dl, Shifts);
  4683. Factor = DAG.getBuildVector(VT, dl, Factors);
  4684. } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
  4685. assert(Shifts.size() == 1 && Factors.size() == 1 &&
  4686. "Expected matchUnaryPredicate to return one element for scalable "
  4687. "vectors");
  4688. Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
  4689. Factor = DAG.getSplatVector(VT, dl, Factors[0]);
  4690. } else {
  4691. assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
  4692. Shift = Shifts[0];
  4693. Factor = Factors[0];
  4694. }
  4695. SDValue Res = Op0;
  4696. // Shift the value upfront if it is even, so the LSB is one.
  4697. if (UseSRA) {
  4698. // TODO: For UDIV use SRL instead of SRA.
  4699. SDNodeFlags Flags;
  4700. Flags.setExact(true);
  4701. Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, Flags);
  4702. Created.push_back(Res.getNode());
  4703. }
  4704. return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
  4705. }
  4706. SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
  4707. SelectionDAG &DAG,
  4708. SmallVectorImpl<SDNode *> &Created) const {
  4709. AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
  4710. const TargetLowering &TLI = DAG.getTargetLoweringInfo();
  4711. if (TLI.isIntDivCheap(N->getValueType(0), Attr))
  4712. return SDValue(N, 0); // Lower SDIV as SDIV
  4713. return SDValue();
  4714. }
  4715. /// Given an ISD::SDIV node expressing a divide by constant,
  4716. /// return a DAG expression to select that will generate the same value by
  4717. /// multiplying by a magic number.
  4718. /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
  4719. SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
  4720. bool IsAfterLegalization,
  4721. SmallVectorImpl<SDNode *> &Created) const {
  4722. SDLoc dl(N);
  4723. EVT VT = N->getValueType(0);
  4724. EVT SVT = VT.getScalarType();
  4725. EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
  4726. EVT ShSVT = ShVT.getScalarType();
  4727. unsigned EltBits = VT.getScalarSizeInBits();
  4728. EVT MulVT;
  4729. // Check to see if we can do this.
  4730. // FIXME: We should be more aggressive here.
  4731. if (!isTypeLegal(VT)) {
  4732. // Limit this to simple scalars for now.
  4733. if (VT.isVector() || !VT.isSimple())
  4734. return SDValue();
  4735. // If this type will be promoted to a large enough type with a legal
  4736. // multiply operation, we can go ahead and do this transform.
  4737. if (getTypeAction(VT.getSimpleVT()) != TypePromoteInteger)
  4738. return SDValue();
  4739. MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
  4740. if (MulVT.getSizeInBits() < (2 * EltBits) ||
  4741. !isOperationLegal(ISD::MUL, MulVT))
  4742. return SDValue();
  4743. }
  4744. // If the sdiv has an 'exact' bit we can use a simpler lowering.
  4745. if (N->getFlags().hasExact())
  4746. return BuildExactSDIV(*this, N, dl, DAG, Created);
  4747. SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
  4748. auto BuildSDIVPattern = [&](ConstantSDNode *C) {
  4749. if (C->isZero())
  4750. return false;
  4751. const APInt &Divisor = C->getAPIntValue();
  4752. SignedDivisionByConstantInfo magics = SignedDivisionByConstantInfo::get(Divisor);
  4753. int NumeratorFactor = 0;
  4754. int ShiftMask = -1;
  4755. if (Divisor.isOne() || Divisor.isAllOnes()) {
  4756. // If d is +1/-1, we just multiply the numerator by +1/-1.
  4757. NumeratorFactor = Divisor.getSExtValue();
  4758. magics.Magic = 0;
  4759. magics.ShiftAmount = 0;
  4760. ShiftMask = 0;
  4761. } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
  4762. // If d > 0 and m < 0, add the numerator.
  4763. NumeratorFactor = 1;
  4764. } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
  4765. // If d < 0 and m > 0, subtract the numerator.
  4766. NumeratorFactor = -1;
  4767. }
  4768. MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT));
  4769. Factors.push_back(DAG.getConstant(NumeratorFactor, dl, SVT));
  4770. Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT));
  4771. ShiftMasks.push_back(DAG.getConstant(ShiftMask, dl, SVT));
  4772. return true;
  4773. };
  4774. SDValue N0 = N->getOperand(0);
  4775. SDValue N1 = N->getOperand(1);
  4776. // Collect the shifts / magic values from each element.
  4777. if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
  4778. return SDValue();
  4779. SDValue MagicFactor, Factor, Shift, ShiftMask;
  4780. if (N1.getOpcode() == ISD::BUILD_VECTOR) {
  4781. MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
  4782. Factor = DAG.getBuildVector(VT, dl, Factors);
  4783. Shift = DAG.getBuildVector(ShVT, dl, Shifts);
  4784. ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
  4785. } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
  4786. assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
  4787. Shifts.size() == 1 && ShiftMasks.size() == 1 &&
  4788. "Expected matchUnaryPredicate to return one element for scalable "
  4789. "vectors");
  4790. MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
  4791. Factor = DAG.getSplatVector(VT, dl, Factors[0]);
  4792. Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
  4793. ShiftMask = DAG.getSplatVector(VT, dl, ShiftMasks[0]);
  4794. } else {
  4795. assert(isa<ConstantSDNode>(N1) && "Expected a constant");
  4796. MagicFactor = MagicFactors[0];
  4797. Factor = Factors[0];
  4798. Shift = Shifts[0];
  4799. ShiftMask = ShiftMasks[0];
  4800. }
  4801. // Multiply the numerator (operand 0) by the magic value.
  4802. // FIXME: We should support doing a MUL in a wider type.
  4803. auto GetMULHS = [&](SDValue X, SDValue Y) {
  4804. // If the type isn't legal, use a wider mul of the the type calculated
  4805. // earlier.
  4806. if (!isTypeLegal(VT)) {
  4807. X = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, X);
  4808. Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, Y);
  4809. Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
  4810. Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
  4811. DAG.getShiftAmountConstant(EltBits, MulVT, dl));
  4812. return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
  4813. }
  4814. if (isOperationLegalOrCustom(ISD::MULHS, VT, IsAfterLegalization))
  4815. return DAG.getNode(ISD::MULHS, dl, VT, X, Y);
  4816. if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT, IsAfterLegalization)) {
  4817. SDValue LoHi =
  4818. DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
  4819. return SDValue(LoHi.getNode(), 1);
  4820. }
  4821. return SDValue();
  4822. };
  4823. SDValue Q = GetMULHS(N0, MagicFactor);
  4824. if (!Q)
  4825. return SDValue();
  4826. Created.push_back(Q.getNode());
  4827. // (Optionally) Add/subtract the numerator using Factor.
  4828. Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
  4829. Created.push_back(Factor.getNode());
  4830. Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
  4831. Created.push_back(Q.getNode());
  4832. // Shift right algebraic by shift value.
  4833. Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
  4834. Created.push_back(Q.getNode());
  4835. // Extract the sign bit, mask it and add it to the quotient.
  4836. SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
  4837. SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
  4838. Created.push_back(T.getNode());
  4839. T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
  4840. Created.push_back(T.getNode());
  4841. return DAG.getNode(ISD::ADD, dl, VT, Q, T);
  4842. }
  4843. /// Given an ISD::UDIV node expressing a divide by constant,
  4844. /// return a DAG expression to select that will generate the same value by
  4845. /// multiplying by a magic number.
  4846. /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
  4847. SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
  4848. bool IsAfterLegalization,
  4849. SmallVectorImpl<SDNode *> &Created) const {
  4850. SDLoc dl(N);
  4851. EVT VT = N->getValueType(0);
  4852. EVT SVT = VT.getScalarType();
  4853. EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
  4854. EVT ShSVT = ShVT.getScalarType();
  4855. unsigned EltBits = VT.getScalarSizeInBits();
  4856. EVT MulVT;
  4857. // Check to see if we can do this.
  4858. // FIXME: We should be more aggressive here.
  4859. if (!isTypeLegal(VT)) {
  4860. // Limit this to simple scalars for now.
  4861. if (VT.isVector() || !VT.isSimple())
  4862. return SDValue();
  4863. // If this type will be promoted to a large enough type with a legal
  4864. // multiply operation, we can go ahead and do this transform.
  4865. if (getTypeAction(VT.getSimpleVT()) != TypePromoteInteger)
  4866. return SDValue();
  4867. MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
  4868. if (MulVT.getSizeInBits() < (2 * EltBits) ||
  4869. !isOperationLegal(ISD::MUL, MulVT))
  4870. return SDValue();
  4871. }
  4872. bool UseNPQ = false;
  4873. SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
  4874. auto BuildUDIVPattern = [&](ConstantSDNode *C) {
  4875. if (C->isZero())
  4876. return false;
  4877. // FIXME: We should use a narrower constant when the upper
  4878. // bits are known to be zero.
  4879. const APInt& Divisor = C->getAPIntValue();
  4880. UnsignedDivisonByConstantInfo magics = UnsignedDivisonByConstantInfo::get(Divisor);
  4881. unsigned PreShift = 0, PostShift = 0;
  4882. // If the divisor is even, we can avoid using the expensive fixup by
  4883. // shifting the divided value upfront.
  4884. if (magics.IsAdd != 0 && !Divisor[0]) {
  4885. PreShift = Divisor.countTrailingZeros();
  4886. // Get magic number for the shifted divisor.
  4887. magics = UnsignedDivisonByConstantInfo::get(Divisor.lshr(PreShift), PreShift);
  4888. assert(magics.IsAdd == 0 && "Should use cheap fixup now");
  4889. }
  4890. APInt Magic = magics.Magic;
  4891. unsigned SelNPQ;
  4892. if (magics.IsAdd == 0 || Divisor.isOne()) {
  4893. assert(magics.ShiftAmount < Divisor.getBitWidth() &&
  4894. "We shouldn't generate an undefined shift!");
  4895. PostShift = magics.ShiftAmount;
  4896. SelNPQ = false;
  4897. } else {
  4898. PostShift = magics.ShiftAmount - 1;
  4899. SelNPQ = true;
  4900. }
  4901. PreShifts.push_back(DAG.getConstant(PreShift, dl, ShSVT));
  4902. MagicFactors.push_back(DAG.getConstant(Magic, dl, SVT));
  4903. NPQFactors.push_back(
  4904. DAG.getConstant(SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
  4905. : APInt::getZero(EltBits),
  4906. dl, SVT));
  4907. PostShifts.push_back(DAG.getConstant(PostShift, dl, ShSVT));
  4908. UseNPQ |= SelNPQ;
  4909. return true;
  4910. };
  4911. SDValue N0 = N->getOperand(0);
  4912. SDValue N1 = N->getOperand(1);
  4913. // Collect the shifts/magic values from each element.
  4914. if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
  4915. return SDValue();
  4916. SDValue PreShift, PostShift, MagicFactor, NPQFactor;
  4917. if (N1.getOpcode() == ISD::BUILD_VECTOR) {
  4918. PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
  4919. MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
  4920. NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
  4921. PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
  4922. } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
  4923. assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
  4924. NPQFactors.size() == 1 && PostShifts.size() == 1 &&
  4925. "Expected matchUnaryPredicate to return one for scalable vectors");
  4926. PreShift = DAG.getSplatVector(ShVT, dl, PreShifts[0]);
  4927. MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
  4928. NPQFactor = DAG.getSplatVector(VT, dl, NPQFactors[0]);
  4929. PostShift = DAG.getSplatVector(ShVT, dl, PostShifts[0]);
  4930. } else {
  4931. assert(isa<ConstantSDNode>(N1) && "Expected a constant");
  4932. PreShift = PreShifts[0];
  4933. MagicFactor = MagicFactors[0];
  4934. PostShift = PostShifts[0];
  4935. }
  4936. SDValue Q = N0;
  4937. Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
  4938. Created.push_back(Q.getNode());
  4939. // FIXME: We should support doing a MUL in a wider type.
  4940. auto GetMULHU = [&](SDValue X, SDValue Y) {
  4941. // If the type isn't legal, use a wider mul of the the type calculated
  4942. // earlier.
  4943. if (!isTypeLegal(VT)) {
  4944. X = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, X);
  4945. Y = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, Y);
  4946. Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
  4947. Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
  4948. DAG.getShiftAmountConstant(EltBits, MulVT, dl));
  4949. return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
  4950. }
  4951. if (isOperationLegalOrCustom(ISD::MULHU, VT, IsAfterLegalization))
  4952. return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
  4953. if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT, IsAfterLegalization)) {
  4954. SDValue LoHi =
  4955. DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
  4956. return SDValue(LoHi.getNode(), 1);
  4957. }
  4958. return SDValue(); // No mulhu or equivalent
  4959. };
  4960. // Multiply the numerator (operand 0) by the magic value.
  4961. Q = GetMULHU(Q, MagicFactor);
  4962. if (!Q)
  4963. return SDValue();
  4964. Created.push_back(Q.getNode());
  4965. if (UseNPQ) {
  4966. SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
  4967. Created.push_back(NPQ.getNode());
  4968. // For vectors we might have a mix of non-NPQ/NPQ paths, so use
  4969. // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
  4970. if (VT.isVector())
  4971. NPQ = GetMULHU(NPQ, NPQFactor);
  4972. else
  4973. NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
  4974. Created.push_back(NPQ.getNode());
  4975. Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
  4976. Created.push_back(Q.getNode());
  4977. }
  4978. Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
  4979. Created.push_back(Q.getNode());
  4980. EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
  4981. SDValue One = DAG.getConstant(1, dl, VT);
  4982. SDValue IsOne = DAG.getSetCC(dl, SetCCVT, N1, One, ISD::SETEQ);
  4983. return DAG.getSelect(dl, VT, IsOne, N0, Q);
  4984. }
  4985. /// If all values in Values that *don't* match the predicate are same 'splat'
  4986. /// value, then replace all values with that splat value.
  4987. /// Else, if AlternativeReplacement was provided, then replace all values that
  4988. /// do match predicate with AlternativeReplacement value.
  4989. static void
  4990. turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values,
  4991. std::function<bool(SDValue)> Predicate,
  4992. SDValue AlternativeReplacement = SDValue()) {
  4993. SDValue Replacement;
  4994. // Is there a value for which the Predicate does *NOT* match? What is it?
  4995. auto SplatValue = llvm::find_if_not(Values, Predicate);
  4996. if (SplatValue != Values.end()) {
  4997. // Does Values consist only of SplatValue's and values matching Predicate?
  4998. if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
  4999. return Value == *SplatValue || Predicate(Value);
  5000. })) // Then we shall replace values matching predicate with SplatValue.
  5001. Replacement = *SplatValue;
  5002. }
  5003. if (!Replacement) {
  5004. // Oops, we did not find the "baseline" splat value.
  5005. if (!AlternativeReplacement)
  5006. return; // Nothing to do.
  5007. // Let's replace with provided value then.
  5008. Replacement = AlternativeReplacement;
  5009. }
  5010. std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
  5011. }
  5012. /// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
  5013. /// where the divisor is constant and the comparison target is zero,
  5014. /// return a DAG expression that will generate the same comparison result
  5015. /// using only multiplications, additions and shifts/rotations.
  5016. /// Ref: "Hacker's Delight" 10-17.
  5017. SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
  5018. SDValue CompTargetNode,
  5019. ISD::CondCode Cond,
  5020. DAGCombinerInfo &DCI,
  5021. const SDLoc &DL) const {
  5022. SmallVector<SDNode *, 5> Built;
  5023. if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
  5024. DCI, DL, Built)) {
  5025. for (SDNode *N : Built)
  5026. DCI.AddToWorklist(N);
  5027. return Folded;
  5028. }
  5029. return SDValue();
  5030. }
  5031. SDValue
  5032. TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
  5033. SDValue CompTargetNode, ISD::CondCode Cond,
  5034. DAGCombinerInfo &DCI, const SDLoc &DL,
  5035. SmallVectorImpl<SDNode *> &Created) const {
  5036. // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
  5037. // - D must be constant, with D = D0 * 2^K where D0 is odd
  5038. // - P is the multiplicative inverse of D0 modulo 2^W
  5039. // - Q = floor(((2^W) - 1) / D)
  5040. // where W is the width of the common type of N and D.
  5041. assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
  5042. "Only applicable for (in)equality comparisons.");
  5043. SelectionDAG &DAG = DCI.DAG;
  5044. EVT VT = REMNode.getValueType();
  5045. EVT SVT = VT.getScalarType();
  5046. EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout(), !DCI.isBeforeLegalize());
  5047. EVT ShSVT = ShVT.getScalarType();
  5048. // If MUL is unavailable, we cannot proceed in any case.
  5049. if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
  5050. return SDValue();
  5051. bool ComparingWithAllZeros = true;
  5052. bool AllComparisonsWithNonZerosAreTautological = true;
  5053. bool HadTautologicalLanes = false;
  5054. bool AllLanesAreTautological = true;
  5055. bool HadEvenDivisor = false;
  5056. bool AllDivisorsArePowerOfTwo = true;
  5057. bool HadTautologicalInvertedLanes = false;
  5058. SmallVector<SDValue, 16> PAmts, KAmts, QAmts, IAmts;
  5059. auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
  5060. // Division by 0 is UB. Leave it to be constant-folded elsewhere.
  5061. if (CDiv->isZero())
  5062. return false;
  5063. const APInt &D = CDiv->getAPIntValue();
  5064. const APInt &Cmp = CCmp->getAPIntValue();
  5065. ComparingWithAllZeros &= Cmp.isZero();
  5066. // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
  5067. // if C2 is not less than C1, the comparison is always false.
  5068. // But we will only be able to produce the comparison that will give the
  5069. // opposive tautological answer. So this lane would need to be fixed up.
  5070. bool TautologicalInvertedLane = D.ule(Cmp);
  5071. HadTautologicalInvertedLanes |= TautologicalInvertedLane;
  5072. // If all lanes are tautological (either all divisors are ones, or divisor
  5073. // is not greater than the constant we are comparing with),
  5074. // we will prefer to avoid the fold.
  5075. bool TautologicalLane = D.isOne() || TautologicalInvertedLane;
  5076. HadTautologicalLanes |= TautologicalLane;
  5077. AllLanesAreTautological &= TautologicalLane;
  5078. // If we are comparing with non-zero, we need'll need to subtract said
  5079. // comparison value from the LHS. But there is no point in doing that if
  5080. // every lane where we are comparing with non-zero is tautological..
  5081. if (!Cmp.isZero())
  5082. AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
  5083. // Decompose D into D0 * 2^K
  5084. unsigned K = D.countTrailingZeros();
  5085. assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
  5086. APInt D0 = D.lshr(K);
  5087. // D is even if it has trailing zeros.
  5088. HadEvenDivisor |= (K != 0);
  5089. // D is a power-of-two if D0 is one.
  5090. // If all divisors are power-of-two, we will prefer to avoid the fold.
  5091. AllDivisorsArePowerOfTwo &= D0.isOne();
  5092. // P = inv(D0, 2^W)
  5093. // 2^W requires W + 1 bits, so we have to extend and then truncate.
  5094. unsigned W = D.getBitWidth();
  5095. APInt P = D0.zext(W + 1)
  5096. .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
  5097. .trunc(W);
  5098. assert(!P.isZero() && "No multiplicative inverse!"); // unreachable
  5099. assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
  5100. // Q = floor((2^W - 1) u/ D)
  5101. // R = ((2^W - 1) u% D)
  5102. APInt Q, R;
  5103. APInt::udivrem(APInt::getAllOnes(W), D, Q, R);
  5104. // If we are comparing with zero, then that comparison constant is okay,
  5105. // else it may need to be one less than that.
  5106. if (Cmp.ugt(R))
  5107. Q -= 1;
  5108. assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
  5109. "We are expecting that K is always less than all-ones for ShSVT");
  5110. // If the lane is tautological the result can be constant-folded.
  5111. if (TautologicalLane) {
  5112. // Set P and K amount to a bogus values so we can try to splat them.
  5113. P = 0;
  5114. K = -1;
  5115. // And ensure that comparison constant is tautological,
  5116. // it will always compare true/false.
  5117. Q = -1;
  5118. }
  5119. PAmts.push_back(DAG.getConstant(P, DL, SVT));
  5120. KAmts.push_back(
  5121. DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
  5122. QAmts.push_back(DAG.getConstant(Q, DL, SVT));
  5123. return true;
  5124. };
  5125. SDValue N = REMNode.getOperand(0);
  5126. SDValue D = REMNode.getOperand(1);
  5127. // Collect the values from each element.
  5128. if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
  5129. return SDValue();
  5130. // If all lanes are tautological, the result can be constant-folded.
  5131. if (AllLanesAreTautological)
  5132. return SDValue();
  5133. // If this is a urem by a powers-of-two, avoid the fold since it can be
  5134. // best implemented as a bit test.
  5135. if (AllDivisorsArePowerOfTwo)
  5136. return SDValue();
  5137. SDValue PVal, KVal, QVal;
  5138. if (D.getOpcode() == ISD::BUILD_VECTOR) {
  5139. if (HadTautologicalLanes) {
  5140. // Try to turn PAmts into a splat, since we don't care about the values
  5141. // that are currently '0'. If we can't, just keep '0'`s.
  5142. turnVectorIntoSplatVector(PAmts, isNullConstant);
  5143. // Try to turn KAmts into a splat, since we don't care about the values
  5144. // that are currently '-1'. If we can't, change them to '0'`s.
  5145. turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
  5146. DAG.getConstant(0, DL, ShSVT));
  5147. }
  5148. PVal = DAG.getBuildVector(VT, DL, PAmts);
  5149. KVal = DAG.getBuildVector(ShVT, DL, KAmts);
  5150. QVal = DAG.getBuildVector(VT, DL, QAmts);
  5151. } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
  5152. assert(PAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 &&
  5153. "Expected matchBinaryPredicate to return one element for "
  5154. "SPLAT_VECTORs");
  5155. PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
  5156. KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
  5157. QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
  5158. } else {
  5159. PVal = PAmts[0];
  5160. KVal = KAmts[0];
  5161. QVal = QAmts[0];
  5162. }
  5163. if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
  5164. if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::SUB, VT))
  5165. return SDValue(); // FIXME: Could/should use `ISD::ADD`?
  5166. assert(CompTargetNode.getValueType() == N.getValueType() &&
  5167. "Expecting that the types on LHS and RHS of comparisons match.");
  5168. N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
  5169. }
  5170. // (mul N, P)
  5171. SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
  5172. Created.push_back(Op0.getNode());
  5173. // Rotate right only if any divisor was even. We avoid rotates for all-odd
  5174. // divisors as a performance improvement, since rotating by 0 is a no-op.
  5175. if (HadEvenDivisor) {
  5176. // We need ROTR to do this.
  5177. if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
  5178. return SDValue();
  5179. // UREM: (rotr (mul N, P), K)
  5180. Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
  5181. Created.push_back(Op0.getNode());
  5182. }
  5183. // UREM: (setule/setugt (rotr (mul N, P), K), Q)
  5184. SDValue NewCC =
  5185. DAG.getSetCC(DL, SETCCVT, Op0, QVal,
  5186. ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
  5187. if (!HadTautologicalInvertedLanes)
  5188. return NewCC;
  5189. // If any lanes previously compared always-false, the NewCC will give
  5190. // always-true result for them, so we need to fixup those lanes.
  5191. // Or the other way around for inequality predicate.
  5192. assert(VT.isVector() && "Can/should only get here for vectors.");
  5193. Created.push_back(NewCC.getNode());
  5194. // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
  5195. // if C2 is not less than C1, the comparison is always false.
  5196. // But we have produced the comparison that will give the
  5197. // opposive tautological answer. So these lanes would need to be fixed up.
  5198. SDValue TautologicalInvertedChannels =
  5199. DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
  5200. Created.push_back(TautologicalInvertedChannels.getNode());
  5201. // NOTE: we avoid letting illegal types through even if we're before legalize
  5202. // ops – legalization has a hard time producing good code for this.
  5203. if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
  5204. // If we have a vector select, let's replace the comparison results in the
  5205. // affected lanes with the correct tautological result.
  5206. SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
  5207. DL, SETCCVT, SETCCVT);
  5208. return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
  5209. Replacement, NewCC);
  5210. }
  5211. // Else, we can just invert the comparison result in the appropriate lanes.
  5212. //
  5213. // NOTE: see the note above VSELECT above.
  5214. if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
  5215. return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
  5216. TautologicalInvertedChannels);
  5217. return SDValue(); // Don't know how to lower.
  5218. }
  5219. /// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
  5220. /// where the divisor is constant and the comparison target is zero,
  5221. /// return a DAG expression that will generate the same comparison result
  5222. /// using only multiplications, additions and shifts/rotations.
  5223. /// Ref: "Hacker's Delight" 10-17.
  5224. SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
  5225. SDValue CompTargetNode,
  5226. ISD::CondCode Cond,
  5227. DAGCombinerInfo &DCI,
  5228. const SDLoc &DL) const {
  5229. SmallVector<SDNode *, 7> Built;
  5230. if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
  5231. DCI, DL, Built)) {
  5232. assert(Built.size() <= 7 && "Max size prediction failed.");
  5233. for (SDNode *N : Built)
  5234. DCI.AddToWorklist(N);
  5235. return Folded;
  5236. }
  5237. return SDValue();
  5238. }
  5239. SDValue
  5240. TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
  5241. SDValue CompTargetNode, ISD::CondCode Cond,
  5242. DAGCombinerInfo &DCI, const SDLoc &DL,
  5243. SmallVectorImpl<SDNode *> &Created) const {
  5244. // Fold:
  5245. // (seteq/ne (srem N, D), 0)
  5246. // To:
  5247. // (setule/ugt (rotr (add (mul N, P), A), K), Q)
  5248. //
  5249. // - D must be constant, with D = D0 * 2^K where D0 is odd
  5250. // - P is the multiplicative inverse of D0 modulo 2^W
  5251. // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
  5252. // - Q = floor((2 * A) / (2^K))
  5253. // where W is the width of the common type of N and D.
  5254. assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
  5255. "Only applicable for (in)equality comparisons.");
  5256. SelectionDAG &DAG = DCI.DAG;
  5257. EVT VT = REMNode.getValueType();
  5258. EVT SVT = VT.getScalarType();
  5259. EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout(), !DCI.isBeforeLegalize());
  5260. EVT ShSVT = ShVT.getScalarType();
  5261. // If we are after ops legalization, and MUL is unavailable, we can not
  5262. // proceed.
  5263. if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
  5264. return SDValue();
  5265. // TODO: Could support comparing with non-zero too.
  5266. ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
  5267. if (!CompTarget || !CompTarget->isZero())
  5268. return SDValue();
  5269. bool HadIntMinDivisor = false;
  5270. bool HadOneDivisor = false;
  5271. bool AllDivisorsAreOnes = true;
  5272. bool HadEvenDivisor = false;
  5273. bool NeedToApplyOffset = false;
  5274. bool AllDivisorsArePowerOfTwo = true;
  5275. SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
  5276. auto BuildSREMPattern = [&](ConstantSDNode *C) {
  5277. // Division by 0 is UB. Leave it to be constant-folded elsewhere.
  5278. if (C->isZero())
  5279. return false;
  5280. // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
  5281. // WARNING: this fold is only valid for positive divisors!
  5282. APInt D = C->getAPIntValue();
  5283. if (D.isNegative())
  5284. D.negate(); // `rem %X, -C` is equivalent to `rem %X, C`
  5285. HadIntMinDivisor |= D.isMinSignedValue();
  5286. // If all divisors are ones, we will prefer to avoid the fold.
  5287. HadOneDivisor |= D.isOne();
  5288. AllDivisorsAreOnes &= D.isOne();
  5289. // Decompose D into D0 * 2^K
  5290. unsigned K = D.countTrailingZeros();
  5291. assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
  5292. APInt D0 = D.lshr(K);
  5293. if (!D.isMinSignedValue()) {
  5294. // D is even if it has trailing zeros; unless it's INT_MIN, in which case
  5295. // we don't care about this lane in this fold, we'll special-handle it.
  5296. HadEvenDivisor |= (K != 0);
  5297. }
  5298. // D is a power-of-two if D0 is one. This includes INT_MIN.
  5299. // If all divisors are power-of-two, we will prefer to avoid the fold.
  5300. AllDivisorsArePowerOfTwo &= D0.isOne();
  5301. // P = inv(D0, 2^W)
  5302. // 2^W requires W + 1 bits, so we have to extend and then truncate.
  5303. unsigned W = D.getBitWidth();
  5304. APInt P = D0.zext(W + 1)
  5305. .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
  5306. .trunc(W);
  5307. assert(!P.isZero() && "No multiplicative inverse!"); // unreachable
  5308. assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
  5309. // A = floor((2^(W - 1) - 1) / D0) & -2^K
  5310. APInt A = APInt::getSignedMaxValue(W).udiv(D0);
  5311. A.clearLowBits(K);
  5312. if (!D.isMinSignedValue()) {
  5313. // If divisor INT_MIN, then we don't care about this lane in this fold,
  5314. // we'll special-handle it.
  5315. NeedToApplyOffset |= A != 0;
  5316. }
  5317. // Q = floor((2 * A) / (2^K))
  5318. APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
  5319. assert(APInt::getAllOnes(SVT.getSizeInBits()).ugt(A) &&
  5320. "We are expecting that A is always less than all-ones for SVT");
  5321. assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
  5322. "We are expecting that K is always less than all-ones for ShSVT");
  5323. // If the divisor is 1 the result can be constant-folded. Likewise, we
  5324. // don't care about INT_MIN lanes, those can be set to undef if appropriate.
  5325. if (D.isOne()) {
  5326. // Set P, A and K to a bogus values so we can try to splat them.
  5327. P = 0;
  5328. A = -1;
  5329. K = -1;
  5330. // x ?% 1 == 0 <--> true <--> x u<= -1
  5331. Q = -1;
  5332. }
  5333. PAmts.push_back(DAG.getConstant(P, DL, SVT));
  5334. AAmts.push_back(DAG.getConstant(A, DL, SVT));
  5335. KAmts.push_back(
  5336. DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
  5337. QAmts.push_back(DAG.getConstant(Q, DL, SVT));
  5338. return true;
  5339. };
  5340. SDValue N = REMNode.getOperand(0);
  5341. SDValue D = REMNode.getOperand(1);
  5342. // Collect the values from each element.
  5343. if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
  5344. return SDValue();
  5345. // If this is a srem by a one, avoid the fold since it can be constant-folded.
  5346. if (AllDivisorsAreOnes)
  5347. return SDValue();
  5348. // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
  5349. // since it can be best implemented as a bit test.
  5350. if (AllDivisorsArePowerOfTwo)
  5351. return SDValue();
  5352. SDValue PVal, AVal, KVal, QVal;
  5353. if (D.getOpcode() == ISD::BUILD_VECTOR) {
  5354. if (HadOneDivisor) {
  5355. // Try to turn PAmts into a splat, since we don't care about the values
  5356. // that are currently '0'. If we can't, just keep '0'`s.
  5357. turnVectorIntoSplatVector(PAmts, isNullConstant);
  5358. // Try to turn AAmts into a splat, since we don't care about the
  5359. // values that are currently '-1'. If we can't, change them to '0'`s.
  5360. turnVectorIntoSplatVector(AAmts, isAllOnesConstant,
  5361. DAG.getConstant(0, DL, SVT));
  5362. // Try to turn KAmts into a splat, since we don't care about the values
  5363. // that are currently '-1'. If we can't, change them to '0'`s.
  5364. turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
  5365. DAG.getConstant(0, DL, ShSVT));
  5366. }
  5367. PVal = DAG.getBuildVector(VT, DL, PAmts);
  5368. AVal = DAG.getBuildVector(VT, DL, AAmts);
  5369. KVal = DAG.getBuildVector(ShVT, DL, KAmts);
  5370. QVal = DAG.getBuildVector(VT, DL, QAmts);
  5371. } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
  5372. assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
  5373. QAmts.size() == 1 &&
  5374. "Expected matchUnaryPredicate to return one element for scalable "
  5375. "vectors");
  5376. PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
  5377. AVal = DAG.getSplatVector(VT, DL, AAmts[0]);
  5378. KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
  5379. QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
  5380. } else {
  5381. assert(isa<ConstantSDNode>(D) && "Expected a constant");
  5382. PVal = PAmts[0];
  5383. AVal = AAmts[0];
  5384. KVal = KAmts[0];
  5385. QVal = QAmts[0];
  5386. }
  5387. // (mul N, P)
  5388. SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
  5389. Created.push_back(Op0.getNode());
  5390. if (NeedToApplyOffset) {
  5391. // We need ADD to do this.
  5392. if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ADD, VT))
  5393. return SDValue();
  5394. // (add (mul N, P), A)
  5395. Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
  5396. Created.push_back(Op0.getNode());
  5397. }
  5398. // Rotate right only if any divisor was even. We avoid rotates for all-odd
  5399. // divisors as a performance improvement, since rotating by 0 is a no-op.
  5400. if (HadEvenDivisor) {
  5401. // We need ROTR to do this.
  5402. if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
  5403. return SDValue();
  5404. // SREM: (rotr (add (mul N, P), A), K)
  5405. Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
  5406. Created.push_back(Op0.getNode());
  5407. }
  5408. // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
  5409. SDValue Fold =
  5410. DAG.getSetCC(DL, SETCCVT, Op0, QVal,
  5411. ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
  5412. // If we didn't have lanes with INT_MIN divisor, then we're done.
  5413. if (!HadIntMinDivisor)
  5414. return Fold;
  5415. // That fold is only valid for positive divisors. Which effectively means,
  5416. // it is invalid for INT_MIN divisors. So if we have such a lane,
  5417. // we must fix-up results for said lanes.
  5418. assert(VT.isVector() && "Can/should only get here for vectors.");
  5419. // NOTE: we avoid letting illegal types through even if we're before legalize
  5420. // ops – legalization has a hard time producing good code for the code that
  5421. // follows.
  5422. if (!isOperationLegalOrCustom(ISD::SETEQ, VT) ||
  5423. !isOperationLegalOrCustom(ISD::AND, VT) ||
  5424. !isOperationLegalOrCustom(Cond, VT) ||
  5425. !isOperationLegalOrCustom(ISD::VSELECT, SETCCVT))
  5426. return SDValue();
  5427. Created.push_back(Fold.getNode());
  5428. SDValue IntMin = DAG.getConstant(
  5429. APInt::getSignedMinValue(SVT.getScalarSizeInBits()), DL, VT);
  5430. SDValue IntMax = DAG.getConstant(
  5431. APInt::getSignedMaxValue(SVT.getScalarSizeInBits()), DL, VT);
  5432. SDValue Zero =
  5433. DAG.getConstant(APInt::getZero(SVT.getScalarSizeInBits()), DL, VT);
  5434. // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
  5435. SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
  5436. Created.push_back(DivisorIsIntMin.getNode());
  5437. // (N s% INT_MIN) ==/!= 0 <--> (N & INT_MAX) ==/!= 0
  5438. SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax);
  5439. Created.push_back(Masked.getNode());
  5440. SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond);
  5441. Created.push_back(MaskedIsZero.getNode());
  5442. // To produce final result we need to blend 2 vectors: 'SetCC' and
  5443. // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
  5444. // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
  5445. // constant-folded, select can get lowered to a shuffle with constant mask.
  5446. SDValue Blended = DAG.getNode(ISD::VSELECT, DL, SETCCVT, DivisorIsIntMin,
  5447. MaskedIsZero, Fold);
  5448. return Blended;
  5449. }
  5450. bool TargetLowering::
  5451. verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
  5452. if (!isa<ConstantSDNode>(Op.getOperand(0))) {
  5453. DAG.getContext()->emitError("argument to '__builtin_return_address' must "
  5454. "be a constant integer");
  5455. return true;
  5456. }
  5457. return false;
  5458. }
  5459. SDValue TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
  5460. const DenormalMode &Mode) const {
  5461. SDLoc DL(Op);
  5462. EVT VT = Op.getValueType();
  5463. EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
  5464. SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
  5465. // Testing it with denormal inputs to avoid wrong estimate.
  5466. if (Mode.Input == DenormalMode::IEEE) {
  5467. // This is specifically a check for the handling of denormal inputs,
  5468. // not the result.
  5469. // Test = fabs(X) < SmallestNormal
  5470. const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
  5471. APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
  5472. SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
  5473. SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
  5474. return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
  5475. }
  5476. // Test = X == 0.0
  5477. return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
  5478. }
  5479. SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
  5480. bool LegalOps, bool OptForSize,
  5481. NegatibleCost &Cost,
  5482. unsigned Depth) const {
  5483. // fneg is removable even if it has multiple uses.
  5484. if (Op.getOpcode() == ISD::FNEG) {
  5485. Cost = NegatibleCost::Cheaper;
  5486. return Op.getOperand(0);
  5487. }
  5488. // Don't recurse exponentially.
  5489. if (Depth > SelectionDAG::MaxRecursionDepth)
  5490. return SDValue();
  5491. // Pre-increment recursion depth for use in recursive calls.
  5492. ++Depth;
  5493. const SDNodeFlags Flags = Op->getFlags();
  5494. const TargetOptions &Options = DAG.getTarget().Options;
  5495. EVT VT = Op.getValueType();
  5496. unsigned Opcode = Op.getOpcode();
  5497. // Don't allow anything with multiple uses unless we know it is free.
  5498. if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
  5499. bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
  5500. isFPExtFree(VT, Op.getOperand(0).getValueType());
  5501. if (!IsFreeExtend)
  5502. return SDValue();
  5503. }
  5504. auto RemoveDeadNode = [&](SDValue N) {
  5505. if (N && N.getNode()->use_empty())
  5506. DAG.RemoveDeadNode(N.getNode());
  5507. };
  5508. SDLoc DL(Op);
  5509. // Because getNegatedExpression can delete nodes we need a handle to keep
  5510. // temporary nodes alive in case the recursion manages to create an identical
  5511. // node.
  5512. std::list<HandleSDNode> Handles;
  5513. switch (Opcode) {
  5514. case ISD::ConstantFP: {
  5515. // Don't invert constant FP values after legalization unless the target says
  5516. // the negated constant is legal.
  5517. bool IsOpLegal =
  5518. isOperationLegal(ISD::ConstantFP, VT) ||
  5519. isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
  5520. OptForSize);
  5521. if (LegalOps && !IsOpLegal)
  5522. break;
  5523. APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
  5524. V.changeSign();
  5525. SDValue CFP = DAG.getConstantFP(V, DL, VT);
  5526. // If we already have the use of the negated floating constant, it is free
  5527. // to negate it even it has multiple uses.
  5528. if (!Op.hasOneUse() && CFP.use_empty())
  5529. break;
  5530. Cost = NegatibleCost::Neutral;
  5531. return CFP;
  5532. }
  5533. case ISD::BUILD_VECTOR: {
  5534. // Only permit BUILD_VECTOR of constants.
  5535. if (llvm::any_of(Op->op_values(), [&](SDValue N) {
  5536. return !N.isUndef() && !isa<ConstantFPSDNode>(N);
  5537. }))
  5538. break;
  5539. bool IsOpLegal =
  5540. (isOperationLegal(ISD::ConstantFP, VT) &&
  5541. isOperationLegal(ISD::BUILD_VECTOR, VT)) ||
  5542. llvm::all_of(Op->op_values(), [&](SDValue N) {
  5543. return N.isUndef() ||
  5544. isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
  5545. OptForSize);
  5546. });
  5547. if (LegalOps && !IsOpLegal)
  5548. break;
  5549. SmallVector<SDValue, 4> Ops;
  5550. for (SDValue C : Op->op_values()) {
  5551. if (C.isUndef()) {
  5552. Ops.push_back(C);
  5553. continue;
  5554. }
  5555. APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
  5556. V.changeSign();
  5557. Ops.push_back(DAG.getConstantFP(V, DL, C.getValueType()));
  5558. }
  5559. Cost = NegatibleCost::Neutral;
  5560. return DAG.getBuildVector(VT, DL, Ops);
  5561. }
  5562. case ISD::FADD: {
  5563. if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
  5564. break;
  5565. // After operation legalization, it might not be legal to create new FSUBs.
  5566. if (LegalOps && !isOperationLegalOrCustom(ISD::FSUB, VT))
  5567. break;
  5568. SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
  5569. // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
  5570. NegatibleCost CostX = NegatibleCost::Expensive;
  5571. SDValue NegX =
  5572. getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
  5573. // Prevent this node from being deleted by the next call.
  5574. if (NegX)
  5575. Handles.emplace_back(NegX);
  5576. // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
  5577. NegatibleCost CostY = NegatibleCost::Expensive;
  5578. SDValue NegY =
  5579. getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
  5580. // We're done with the handles.
  5581. Handles.clear();
  5582. // Negate the X if its cost is less or equal than Y.
  5583. if (NegX && (CostX <= CostY)) {
  5584. Cost = CostX;
  5585. SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
  5586. if (NegY != N)
  5587. RemoveDeadNode(NegY);
  5588. return N;
  5589. }
  5590. // Negate the Y if it is not expensive.
  5591. if (NegY) {
  5592. Cost = CostY;
  5593. SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
  5594. if (NegX != N)
  5595. RemoveDeadNode(NegX);
  5596. return N;
  5597. }
  5598. break;
  5599. }
  5600. case ISD::FSUB: {
  5601. // We can't turn -(A-B) into B-A when we honor signed zeros.
  5602. if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
  5603. break;
  5604. SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
  5605. // fold (fneg (fsub 0, Y)) -> Y
  5606. if (ConstantFPSDNode *C = isConstOrConstSplatFP(X, /*AllowUndefs*/ true))
  5607. if (C->isZero()) {
  5608. Cost = NegatibleCost::Cheaper;
  5609. return Y;
  5610. }
  5611. // fold (fneg (fsub X, Y)) -> (fsub Y, X)
  5612. Cost = NegatibleCost::Neutral;
  5613. return DAG.getNode(ISD::FSUB, DL, VT, Y, X, Flags);
  5614. }
  5615. case ISD::FMUL:
  5616. case ISD::FDIV: {
  5617. SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
  5618. // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
  5619. NegatibleCost CostX = NegatibleCost::Expensive;
  5620. SDValue NegX =
  5621. getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
  5622. // Prevent this node from being deleted by the next call.
  5623. if (NegX)
  5624. Handles.emplace_back(NegX);
  5625. // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
  5626. NegatibleCost CostY = NegatibleCost::Expensive;
  5627. SDValue NegY =
  5628. getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
  5629. // We're done with the handles.
  5630. Handles.clear();
  5631. // Negate the X if its cost is less or equal than Y.
  5632. if (NegX && (CostX <= CostY)) {
  5633. Cost = CostX;
  5634. SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
  5635. if (NegY != N)
  5636. RemoveDeadNode(NegY);
  5637. return N;
  5638. }
  5639. // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
  5640. if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
  5641. if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
  5642. break;
  5643. // Negate the Y if it is not expensive.
  5644. if (NegY) {
  5645. Cost = CostY;
  5646. SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
  5647. if (NegX != N)
  5648. RemoveDeadNode(NegX);
  5649. return N;
  5650. }
  5651. break;
  5652. }
  5653. case ISD::FMA:
  5654. case ISD::FMAD: {
  5655. if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
  5656. break;
  5657. SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2);
  5658. NegatibleCost CostZ = NegatibleCost::Expensive;
  5659. SDValue NegZ =
  5660. getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ, Depth);
  5661. // Give up if fail to negate the Z.
  5662. if (!NegZ)
  5663. break;
  5664. // Prevent this node from being deleted by the next two calls.
  5665. Handles.emplace_back(NegZ);
  5666. // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
  5667. NegatibleCost CostX = NegatibleCost::Expensive;
  5668. SDValue NegX =
  5669. getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
  5670. // Prevent this node from being deleted by the next call.
  5671. if (NegX)
  5672. Handles.emplace_back(NegX);
  5673. // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
  5674. NegatibleCost CostY = NegatibleCost::Expensive;
  5675. SDValue NegY =
  5676. getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
  5677. // We're done with the handles.
  5678. Handles.clear();
  5679. // Negate the X if its cost is less or equal than Y.
  5680. if (NegX && (CostX <= CostY)) {
  5681. Cost = std::min(CostX, CostZ);
  5682. SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
  5683. if (NegY != N)
  5684. RemoveDeadNode(NegY);
  5685. return N;
  5686. }
  5687. // Negate the Y if it is not expensive.
  5688. if (NegY) {
  5689. Cost = std::min(CostY, CostZ);
  5690. SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
  5691. if (NegX != N)
  5692. RemoveDeadNode(NegX);
  5693. return N;
  5694. }
  5695. break;
  5696. }
  5697. case ISD::FP_EXTEND:
  5698. case ISD::FSIN:
  5699. if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
  5700. OptForSize, Cost, Depth))
  5701. return DAG.getNode(Opcode, DL, VT, NegV);
  5702. break;
  5703. case ISD::FP_ROUND:
  5704. if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
  5705. OptForSize, Cost, Depth))
  5706. return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1));
  5707. break;
  5708. }
  5709. return SDValue();
  5710. }
  5711. //===----------------------------------------------------------------------===//
  5712. // Legalization Utilities
  5713. //===----------------------------------------------------------------------===//
  5714. bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
  5715. SDValue LHS, SDValue RHS,
  5716. SmallVectorImpl<SDValue> &Result,
  5717. EVT HiLoVT, SelectionDAG &DAG,
  5718. MulExpansionKind Kind, SDValue LL,
  5719. SDValue LH, SDValue RL, SDValue RH) const {
  5720. assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
  5721. Opcode == ISD::SMUL_LOHI);
  5722. bool HasMULHS = (Kind == MulExpansionKind::Always) ||
  5723. isOperationLegalOrCustom(ISD::MULHS, HiLoVT);
  5724. bool HasMULHU = (Kind == MulExpansionKind::Always) ||
  5725. isOperationLegalOrCustom(ISD::MULHU, HiLoVT);
  5726. bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
  5727. isOperationLegalOrCustom(ISD::SMUL_LOHI, HiLoVT);
  5728. bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
  5729. isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT);
  5730. if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
  5731. return false;
  5732. unsigned OuterBitSize = VT.getScalarSizeInBits();
  5733. unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
  5734. // LL, LH, RL, and RH must be either all NULL or all set to a value.
  5735. assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
  5736. (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
  5737. SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
  5738. auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
  5739. bool Signed) -> bool {
  5740. if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
  5741. Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
  5742. Hi = SDValue(Lo.getNode(), 1);
  5743. return true;
  5744. }
  5745. if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
  5746. Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
  5747. Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
  5748. return true;
  5749. }
  5750. return false;
  5751. };
  5752. SDValue Lo, Hi;
  5753. if (!LL.getNode() && !RL.getNode() &&
  5754. isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
  5755. LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
  5756. RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
  5757. }
  5758. if (!LL.getNode())
  5759. return false;
  5760. APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
  5761. if (DAG.MaskedValueIsZero(LHS, HighMask) &&
  5762. DAG.MaskedValueIsZero(RHS, HighMask)) {
  5763. // The inputs are both zero-extended.
  5764. if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
  5765. Result.push_back(Lo);
  5766. Result.push_back(Hi);
  5767. if (Opcode != ISD::MUL) {
  5768. SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
  5769. Result.push_back(Zero);
  5770. Result.push_back(Zero);
  5771. }
  5772. return true;
  5773. }
  5774. }
  5775. if (!VT.isVector() && Opcode == ISD::MUL &&
  5776. DAG.ComputeNumSignBits(LHS) > InnerBitSize &&
  5777. DAG.ComputeNumSignBits(RHS) > InnerBitSize) {
  5778. // The input values are both sign-extended.
  5779. // TODO non-MUL case?
  5780. if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
  5781. Result.push_back(Lo);
  5782. Result.push_back(Hi);
  5783. return true;
  5784. }
  5785. }
  5786. unsigned ShiftAmount = OuterBitSize - InnerBitSize;
  5787. EVT ShiftAmountTy = getShiftAmountTy(VT, DAG.getDataLayout());
  5788. SDValue Shift = DAG.getConstant(ShiftAmount, dl, ShiftAmountTy);
  5789. if (!LH.getNode() && !RH.getNode() &&
  5790. isOperationLegalOrCustom(ISD::SRL, VT) &&
  5791. isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
  5792. LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
  5793. LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
  5794. RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
  5795. RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
  5796. }
  5797. if (!LH.getNode())
  5798. return false;
  5799. if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
  5800. return false;
  5801. Result.push_back(Lo);
  5802. if (Opcode == ISD::MUL) {
  5803. RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
  5804. LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
  5805. Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
  5806. Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
  5807. Result.push_back(Hi);
  5808. return true;
  5809. }
  5810. // Compute the full width result.
  5811. auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
  5812. Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
  5813. Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
  5814. Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
  5815. return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
  5816. };
  5817. SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
  5818. if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
  5819. return false;
  5820. // This is effectively the add part of a multiply-add of half-sized operands,
  5821. // so it cannot overflow.
  5822. Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
  5823. if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
  5824. return false;
  5825. SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
  5826. EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
  5827. bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
  5828. isOperationLegalOrCustom(ISD::ADDE, VT));
  5829. if (UseGlue)
  5830. Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
  5831. Merge(Lo, Hi));
  5832. else
  5833. Next = DAG.getNode(ISD::ADDCARRY, dl, DAG.getVTList(VT, BoolType), Next,
  5834. Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
  5835. SDValue Carry = Next.getValue(1);
  5836. Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
  5837. Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
  5838. if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
  5839. return false;
  5840. if (UseGlue)
  5841. Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
  5842. Carry);
  5843. else
  5844. Hi = DAG.getNode(ISD::ADDCARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
  5845. Zero, Carry);
  5846. Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
  5847. if (Opcode == ISD::SMUL_LOHI) {
  5848. SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
  5849. DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
  5850. Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
  5851. NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
  5852. DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
  5853. Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
  5854. }
  5855. Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
  5856. Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
  5857. Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
  5858. return true;
  5859. }
  5860. bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
  5861. SelectionDAG &DAG, MulExpansionKind Kind,
  5862. SDValue LL, SDValue LH, SDValue RL,
  5863. SDValue RH) const {
  5864. SmallVector<SDValue, 2> Result;
  5865. bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), SDLoc(N),
  5866. N->getOperand(0), N->getOperand(1), Result, HiLoVT,
  5867. DAG, Kind, LL, LH, RL, RH);
  5868. if (Ok) {
  5869. assert(Result.size() == 2);
  5870. Lo = Result[0];
  5871. Hi = Result[1];
  5872. }
  5873. return Ok;
  5874. }
  5875. // Check that (every element of) Z is undef or not an exact multiple of BW.
  5876. static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
  5877. return ISD::matchUnaryPredicate(
  5878. Z,
  5879. [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; },
  5880. true);
  5881. }
  5882. SDValue TargetLowering::expandFunnelShift(SDNode *Node,
  5883. SelectionDAG &DAG) const {
  5884. EVT VT = Node->getValueType(0);
  5885. if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
  5886. !isOperationLegalOrCustom(ISD::SRL, VT) ||
  5887. !isOperationLegalOrCustom(ISD::SUB, VT) ||
  5888. !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
  5889. return SDValue();
  5890. SDValue X = Node->getOperand(0);
  5891. SDValue Y = Node->getOperand(1);
  5892. SDValue Z = Node->getOperand(2);
  5893. unsigned BW = VT.getScalarSizeInBits();
  5894. bool IsFSHL = Node->getOpcode() == ISD::FSHL;
  5895. SDLoc DL(SDValue(Node, 0));
  5896. EVT ShVT = Z.getValueType();
  5897. // If a funnel shift in the other direction is more supported, use it.
  5898. unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
  5899. if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
  5900. isOperationLegalOrCustom(RevOpcode, VT) && isPowerOf2_32(BW)) {
  5901. if (isNonZeroModBitWidthOrUndef(Z, BW)) {
  5902. // fshl X, Y, Z -> fshr X, Y, -Z
  5903. // fshr X, Y, Z -> fshl X, Y, -Z
  5904. SDValue Zero = DAG.getConstant(0, DL, ShVT);
  5905. Z = DAG.getNode(ISD::SUB, DL, VT, Zero, Z);
  5906. } else {
  5907. // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
  5908. // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
  5909. SDValue One = DAG.getConstant(1, DL, ShVT);
  5910. if (IsFSHL) {
  5911. Y = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
  5912. X = DAG.getNode(ISD::SRL, DL, VT, X, One);
  5913. } else {
  5914. X = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
  5915. Y = DAG.getNode(ISD::SHL, DL, VT, Y, One);
  5916. }
  5917. Z = DAG.getNOT(DL, Z, ShVT);
  5918. }
  5919. return DAG.getNode(RevOpcode, DL, VT, X, Y, Z);
  5920. }
  5921. SDValue ShX, ShY;
  5922. SDValue ShAmt, InvShAmt;
  5923. if (isNonZeroModBitWidthOrUndef(Z, BW)) {
  5924. // fshl: X << C | Y >> (BW - C)
  5925. // fshr: X << (BW - C) | Y >> C
  5926. // where C = Z % BW is not zero
  5927. SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
  5928. ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
  5929. InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
  5930. ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
  5931. ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
  5932. } else {
  5933. // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
  5934. // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
  5935. SDValue Mask = DAG.getConstant(BW - 1, DL, ShVT);
  5936. if (isPowerOf2_32(BW)) {
  5937. // Z % BW -> Z & (BW - 1)
  5938. ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
  5939. // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
  5940. InvShAmt = DAG.getNode(ISD::AND, DL, ShVT, DAG.getNOT(DL, Z, ShVT), Mask);
  5941. } else {
  5942. SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
  5943. ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
  5944. InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, Mask, ShAmt);
  5945. }
  5946. SDValue One = DAG.getConstant(1, DL, ShVT);
  5947. if (IsFSHL) {
  5948. ShX = DAG.getNode(ISD::SHL, DL, VT, X, ShAmt);
  5949. SDValue ShY1 = DAG.getNode(ISD::SRL, DL, VT, Y, One);
  5950. ShY = DAG.getNode(ISD::SRL, DL, VT, ShY1, InvShAmt);
  5951. } else {
  5952. SDValue ShX1 = DAG.getNode(ISD::SHL, DL, VT, X, One);
  5953. ShX = DAG.getNode(ISD::SHL, DL, VT, ShX1, InvShAmt);
  5954. ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt);
  5955. }
  5956. }
  5957. return DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
  5958. }
  5959. // TODO: Merge with expandFunnelShift.
  5960. SDValue TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
  5961. SelectionDAG &DAG) const {
  5962. EVT VT = Node->getValueType(0);
  5963. unsigned EltSizeInBits = VT.getScalarSizeInBits();
  5964. bool IsLeft = Node->getOpcode() == ISD::ROTL;
  5965. SDValue Op0 = Node->getOperand(0);
  5966. SDValue Op1 = Node->getOperand(1);
  5967. SDLoc DL(SDValue(Node, 0));
  5968. EVT ShVT = Op1.getValueType();
  5969. SDValue Zero = DAG.getConstant(0, DL, ShVT);
  5970. // If a rotate in the other direction is more supported, use it.
  5971. unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
  5972. if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
  5973. isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) {
  5974. SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
  5975. return DAG.getNode(RevRot, DL, VT, Op0, Sub);
  5976. }
  5977. if (!AllowVectorOps && VT.isVector() &&
  5978. (!isOperationLegalOrCustom(ISD::SHL, VT) ||
  5979. !isOperationLegalOrCustom(ISD::SRL, VT) ||
  5980. !isOperationLegalOrCustom(ISD::SUB, VT) ||
  5981. !isOperationLegalOrCustomOrPromote(ISD::OR, VT) ||
  5982. !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
  5983. return SDValue();
  5984. unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
  5985. unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
  5986. SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
  5987. SDValue ShVal;
  5988. SDValue HsVal;
  5989. if (isPowerOf2_32(EltSizeInBits)) {
  5990. // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
  5991. // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
  5992. SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
  5993. SDValue ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
  5994. ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
  5995. SDValue HsAmt = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
  5996. HsVal = DAG.getNode(HsOpc, DL, VT, Op0, HsAmt);
  5997. } else {
  5998. // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
  5999. // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
  6000. SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
  6001. SDValue ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Op1, BitWidthC);
  6002. ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
  6003. SDValue HsAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthMinusOneC, ShAmt);
  6004. SDValue One = DAG.getConstant(1, DL, ShVT);
  6005. HsVal =
  6006. DAG.getNode(HsOpc, DL, VT, DAG.getNode(HsOpc, DL, VT, Op0, One), HsAmt);
  6007. }
  6008. return DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal);
  6009. }
  6010. void TargetLowering::expandShiftParts(SDNode *Node, SDValue &Lo, SDValue &Hi,
  6011. SelectionDAG &DAG) const {
  6012. assert(Node->getNumOperands() == 3 && "Not a double-shift!");
  6013. EVT VT = Node->getValueType(0);
  6014. unsigned VTBits = VT.getScalarSizeInBits();
  6015. assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
  6016. bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
  6017. bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
  6018. SDValue ShOpLo = Node->getOperand(0);
  6019. SDValue ShOpHi = Node->getOperand(1);
  6020. SDValue ShAmt = Node->getOperand(2);
  6021. EVT ShAmtVT = ShAmt.getValueType();
  6022. EVT ShAmtCCVT =
  6023. getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShAmtVT);
  6024. SDLoc dl(Node);
  6025. // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
  6026. // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
  6027. // away during isel.
  6028. SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
  6029. DAG.getConstant(VTBits - 1, dl, ShAmtVT));
  6030. SDValue Tmp1 = IsSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
  6031. DAG.getConstant(VTBits - 1, dl, ShAmtVT))
  6032. : DAG.getConstant(0, dl, VT);
  6033. SDValue Tmp2, Tmp3;
  6034. if (IsSHL) {
  6035. Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt);
  6036. Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
  6037. } else {
  6038. Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt);
  6039. Tmp3 = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
  6040. }
  6041. // If the shift amount is larger or equal than the width of a part we don't
  6042. // use the result from the FSHL/FSHR. Insert a test and select the appropriate
  6043. // values for large shift amounts.
  6044. SDValue AndNode = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
  6045. DAG.getConstant(VTBits, dl, ShAmtVT));
  6046. SDValue Cond = DAG.getSetCC(dl, ShAmtCCVT, AndNode,
  6047. DAG.getConstant(0, dl, ShAmtVT), ISD::SETNE);
  6048. if (IsSHL) {
  6049. Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
  6050. Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
  6051. } else {
  6052. Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
  6053. Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
  6054. }
  6055. }
  6056. bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
  6057. SelectionDAG &DAG) const {
  6058. unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
  6059. SDValue Src = Node->getOperand(OpNo);
  6060. EVT SrcVT = Src.getValueType();
  6061. EVT DstVT = Node->getValueType(0);
  6062. SDLoc dl(SDValue(Node, 0));
  6063. // FIXME: Only f32 to i64 conversions are supported.
  6064. if (SrcVT != MVT::f32 || DstVT != MVT::i64)
  6065. return false;
  6066. if (Node->isStrictFPOpcode())
  6067. // When a NaN is converted to an integer a trap is allowed. We can't
  6068. // use this expansion here because it would eliminate that trap. Other
  6069. // traps are also allowed and cannot be eliminated. See
  6070. // IEEE 754-2008 sec 5.8.
  6071. return false;
  6072. // Expand f32 -> i64 conversion
  6073. // This algorithm comes from compiler-rt's implementation of fixsfdi:
  6074. // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
  6075. unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
  6076. EVT IntVT = SrcVT.changeTypeToInteger();
  6077. EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
  6078. SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
  6079. SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
  6080. SDValue Bias = DAG.getConstant(127, dl, IntVT);
  6081. SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
  6082. SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
  6083. SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
  6084. SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
  6085. SDValue ExponentBits = DAG.getNode(
  6086. ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
  6087. DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
  6088. SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
  6089. SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
  6090. DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
  6091. DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
  6092. Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
  6093. SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
  6094. DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
  6095. DAG.getConstant(0x00800000, dl, IntVT));
  6096. R = DAG.getZExtOrTrunc(R, dl, DstVT);
  6097. R = DAG.getSelectCC(
  6098. dl, Exponent, ExponentLoBit,
  6099. DAG.getNode(ISD::SHL, dl, DstVT, R,
  6100. DAG.getZExtOrTrunc(
  6101. DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
  6102. dl, IntShVT)),
  6103. DAG.getNode(ISD::SRL, dl, DstVT, R,
  6104. DAG.getZExtOrTrunc(
  6105. DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
  6106. dl, IntShVT)),
  6107. ISD::SETGT);
  6108. SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
  6109. DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
  6110. Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
  6111. DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
  6112. return true;
  6113. }
  6114. bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
  6115. SDValue &Chain,
  6116. SelectionDAG &DAG) const {
  6117. SDLoc dl(SDValue(Node, 0));
  6118. unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
  6119. SDValue Src = Node->getOperand(OpNo);
  6120. EVT SrcVT = Src.getValueType();
  6121. EVT DstVT = Node->getValueType(0);
  6122. EVT SetCCVT =
  6123. getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
  6124. EVT DstSetCCVT =
  6125. getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
  6126. // Only expand vector types if we have the appropriate vector bit operations.
  6127. unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
  6128. ISD::FP_TO_SINT;
  6129. if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
  6130. !isOperationLegalOrCustomOrPromote(ISD::XOR, SrcVT)))
  6131. return false;
  6132. // If the maximum float value is smaller then the signed integer range,
  6133. // the destination signmask can't be represented by the float, so we can
  6134. // just use FP_TO_SINT directly.
  6135. const fltSemantics &APFSem = DAG.EVTToAPFloatSemantics(SrcVT);
  6136. APFloat APF(APFSem, APInt::getZero(SrcVT.getScalarSizeInBits()));
  6137. APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
  6138. if (APFloat::opOverflow &
  6139. APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
  6140. if (Node->isStrictFPOpcode()) {
  6141. Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
  6142. { Node->getOperand(0), Src });
  6143. Chain = Result.getValue(1);
  6144. } else
  6145. Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
  6146. return true;
  6147. }
  6148. // Don't expand it if there isn't cheap fsub instruction.
  6149. if (!isOperationLegalOrCustom(
  6150. Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, SrcVT))
  6151. return false;
  6152. SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
  6153. SDValue Sel;
  6154. if (Node->isStrictFPOpcode()) {
  6155. Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
  6156. Node->getOperand(0), /*IsSignaling*/ true);
  6157. Chain = Sel.getValue(1);
  6158. } else {
  6159. Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
  6160. }
  6161. bool Strict = Node->isStrictFPOpcode() ||
  6162. shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
  6163. if (Strict) {
  6164. // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
  6165. // signmask then offset (the result of which should be fully representable).
  6166. // Sel = Src < 0x8000000000000000
  6167. // FltOfs = select Sel, 0, 0x8000000000000000
  6168. // IntOfs = select Sel, 0, 0x8000000000000000
  6169. // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
  6170. // TODO: Should any fast-math-flags be set for the FSUB?
  6171. SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
  6172. DAG.getConstantFP(0.0, dl, SrcVT), Cst);
  6173. Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
  6174. SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
  6175. DAG.getConstant(0, dl, DstVT),
  6176. DAG.getConstant(SignMask, dl, DstVT));
  6177. SDValue SInt;
  6178. if (Node->isStrictFPOpcode()) {
  6179. SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
  6180. { Chain, Src, FltOfs });
  6181. SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
  6182. { Val.getValue(1), Val });
  6183. Chain = SInt.getValue(1);
  6184. } else {
  6185. SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
  6186. SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
  6187. }
  6188. Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
  6189. } else {
  6190. // Expand based on maximum range of FP_TO_SINT:
  6191. // True = fp_to_sint(Src)
  6192. // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
  6193. // Result = select (Src < 0x8000000000000000), True, False
  6194. SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
  6195. // TODO: Should any fast-math-flags be set for the FSUB?
  6196. SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
  6197. DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
  6198. False = DAG.getNode(ISD::XOR, dl, DstVT, False,
  6199. DAG.getConstant(SignMask, dl, DstVT));
  6200. Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
  6201. Result = DAG.getSelect(dl, DstVT, Sel, True, False);
  6202. }
  6203. return true;
  6204. }
  6205. bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
  6206. SDValue &Chain,
  6207. SelectionDAG &DAG) const {
  6208. // This transform is not correct for converting 0 when rounding mode is set
  6209. // to round toward negative infinity which will produce -0.0. So disable under
  6210. // strictfp.
  6211. if (Node->isStrictFPOpcode())
  6212. return false;
  6213. SDValue Src = Node->getOperand(0);
  6214. EVT SrcVT = Src.getValueType();
  6215. EVT DstVT = Node->getValueType(0);
  6216. if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
  6217. return false;
  6218. // Only expand vector types if we have the appropriate vector bit operations.
  6219. if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
  6220. !isOperationLegalOrCustom(ISD::FADD, DstVT) ||
  6221. !isOperationLegalOrCustom(ISD::FSUB, DstVT) ||
  6222. !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) ||
  6223. !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT)))
  6224. return false;
  6225. SDLoc dl(SDValue(Node, 0));
  6226. EVT ShiftVT = getShiftAmountTy(SrcVT, DAG.getDataLayout());
  6227. // Implementation of unsigned i64 to f64 following the algorithm in
  6228. // __floatundidf in compiler_rt. This implementation performs rounding
  6229. // correctly in all rounding modes with the exception of converting 0
  6230. // when rounding toward negative infinity. In that case the fsub will produce
  6231. // -0.0. This will be added to +0.0 and produce -0.0 which is incorrect.
  6232. SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
  6233. SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
  6234. BitsToDouble(UINT64_C(0x4530000000100000)), dl, DstVT);
  6235. SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
  6236. SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
  6237. SDValue HiShift = DAG.getConstant(32, dl, ShiftVT);
  6238. SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
  6239. SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
  6240. SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
  6241. SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
  6242. SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
  6243. SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
  6244. SDValue HiSub =
  6245. DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
  6246. Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
  6247. return true;
  6248. }
  6249. SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
  6250. SelectionDAG &DAG) const {
  6251. SDLoc dl(Node);
  6252. unsigned NewOp = Node->getOpcode() == ISD::FMINNUM ?
  6253. ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
  6254. EVT VT = Node->getValueType(0);
  6255. if (VT.isScalableVector())
  6256. report_fatal_error(
  6257. "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
  6258. if (isOperationLegalOrCustom(NewOp, VT)) {
  6259. SDValue Quiet0 = Node->getOperand(0);
  6260. SDValue Quiet1 = Node->getOperand(1);
  6261. if (!Node->getFlags().hasNoNaNs()) {
  6262. // Insert canonicalizes if it's possible we need to quiet to get correct
  6263. // sNaN behavior.
  6264. if (!DAG.isKnownNeverSNaN(Quiet0)) {
  6265. Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
  6266. Node->getFlags());
  6267. }
  6268. if (!DAG.isKnownNeverSNaN(Quiet1)) {
  6269. Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
  6270. Node->getFlags());
  6271. }
  6272. }
  6273. return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
  6274. }
  6275. // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
  6276. // instead if there are no NaNs.
  6277. if (Node->getFlags().hasNoNaNs()) {
  6278. unsigned IEEE2018Op =
  6279. Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
  6280. if (isOperationLegalOrCustom(IEEE2018Op, VT)) {
  6281. return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
  6282. Node->getOperand(1), Node->getFlags());
  6283. }
  6284. }
  6285. // If none of the above worked, but there are no NaNs, then expand to
  6286. // a compare/select sequence. This is required for correctness since
  6287. // InstCombine might have canonicalized a fcmp+select sequence to a
  6288. // FMINNUM/FMAXNUM node. If we were to fall through to the default
  6289. // expansion to libcall, we might introduce a link-time dependency
  6290. // on libm into a file that originally did not have one.
  6291. if (Node->getFlags().hasNoNaNs()) {
  6292. ISD::CondCode Pred =
  6293. Node->getOpcode() == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
  6294. SDValue Op1 = Node->getOperand(0);
  6295. SDValue Op2 = Node->getOperand(1);
  6296. SDValue SelCC = DAG.getSelectCC(dl, Op1, Op2, Op1, Op2, Pred);
  6297. // Copy FMF flags, but always set the no-signed-zeros flag
  6298. // as this is implied by the FMINNUM/FMAXNUM semantics.
  6299. SDNodeFlags Flags = Node->getFlags();
  6300. Flags.setNoSignedZeros(true);
  6301. SelCC->setFlags(Flags);
  6302. return SelCC;
  6303. }
  6304. return SDValue();
  6305. }
  6306. // Only expand vector types if we have the appropriate vector bit operations.
  6307. static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
  6308. assert(VT.isVector() && "Expected vector type");
  6309. unsigned Len = VT.getScalarSizeInBits();
  6310. return TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
  6311. TLI.isOperationLegalOrCustom(ISD::SUB, VT) &&
  6312. TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
  6313. (Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) &&
  6314. TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT);
  6315. }
  6316. SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const {
  6317. SDLoc dl(Node);
  6318. EVT VT = Node->getValueType(0);
  6319. EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
  6320. SDValue Op = Node->getOperand(0);
  6321. unsigned Len = VT.getScalarSizeInBits();
  6322. assert(VT.isInteger() && "CTPOP not implemented for this type.");
  6323. // TODO: Add support for irregular type lengths.
  6324. if (!(Len <= 128 && Len % 8 == 0))
  6325. return SDValue();
  6326. // Only expand vector types if we have the appropriate vector bit operations.
  6327. if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
  6328. return SDValue();
  6329. // This is the "best" algorithm from
  6330. // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
  6331. SDValue Mask55 =
  6332. DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
  6333. SDValue Mask33 =
  6334. DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
  6335. SDValue Mask0F =
  6336. DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
  6337. SDValue Mask01 =
  6338. DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
  6339. // v = v - ((v >> 1) & 0x55555555...)
  6340. Op = DAG.getNode(ISD::SUB, dl, VT, Op,
  6341. DAG.getNode(ISD::AND, dl, VT,
  6342. DAG.getNode(ISD::SRL, dl, VT, Op,
  6343. DAG.getConstant(1, dl, ShVT)),
  6344. Mask55));
  6345. // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
  6346. Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
  6347. DAG.getNode(ISD::AND, dl, VT,
  6348. DAG.getNode(ISD::SRL, dl, VT, Op,
  6349. DAG.getConstant(2, dl, ShVT)),
  6350. Mask33));
  6351. // v = (v + (v >> 4)) & 0x0F0F0F0F...
  6352. Op = DAG.getNode(ISD::AND, dl, VT,
  6353. DAG.getNode(ISD::ADD, dl, VT, Op,
  6354. DAG.getNode(ISD::SRL, dl, VT, Op,
  6355. DAG.getConstant(4, dl, ShVT))),
  6356. Mask0F);
  6357. // v = (v * 0x01010101...) >> (Len - 8)
  6358. if (Len > 8)
  6359. Op =
  6360. DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
  6361. DAG.getConstant(Len - 8, dl, ShVT));
  6362. return Op;
  6363. }
  6364. SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const {
  6365. SDLoc dl(Node);
  6366. EVT VT = Node->getValueType(0);
  6367. EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
  6368. SDValue Op = Node->getOperand(0);
  6369. unsigned NumBitsPerElt = VT.getScalarSizeInBits();
  6370. // If the non-ZERO_UNDEF version is supported we can use that instead.
  6371. if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
  6372. isOperationLegalOrCustom(ISD::CTLZ, VT))
  6373. return DAG.getNode(ISD::CTLZ, dl, VT, Op);
  6374. // If the ZERO_UNDEF version is supported use that and handle the zero case.
  6375. if (isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) {
  6376. EVT SetCCVT =
  6377. getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
  6378. SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
  6379. SDValue Zero = DAG.getConstant(0, dl, VT);
  6380. SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
  6381. return DAG.getSelect(dl, VT, SrcIsZero,
  6382. DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
  6383. }
  6384. // Only expand vector types if we have the appropriate vector bit operations.
  6385. // This includes the operations needed to expand CTPOP if it isn't supported.
  6386. if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
  6387. (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
  6388. !canExpandVectorCTPOP(*this, VT)) ||
  6389. !isOperationLegalOrCustom(ISD::SRL, VT) ||
  6390. !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
  6391. return SDValue();
  6392. // for now, we do this:
  6393. // x = x | (x >> 1);
  6394. // x = x | (x >> 2);
  6395. // ...
  6396. // x = x | (x >>16);
  6397. // x = x | (x >>32); // for 64-bit input
  6398. // return popcount(~x);
  6399. //
  6400. // Ref: "Hacker's Delight" by Henry Warren
  6401. for (unsigned i = 0; (1U << i) <= (NumBitsPerElt / 2); ++i) {
  6402. SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
  6403. Op = DAG.getNode(ISD::OR, dl, VT, Op,
  6404. DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
  6405. }
  6406. Op = DAG.getNOT(dl, Op, VT);
  6407. return DAG.getNode(ISD::CTPOP, dl, VT, Op);
  6408. }
  6409. SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const {
  6410. SDLoc dl(Node);
  6411. EVT VT = Node->getValueType(0);
  6412. SDValue Op = Node->getOperand(0);
  6413. unsigned NumBitsPerElt = VT.getScalarSizeInBits();
  6414. // If the non-ZERO_UNDEF version is supported we can use that instead.
  6415. if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
  6416. isOperationLegalOrCustom(ISD::CTTZ, VT))
  6417. return DAG.getNode(ISD::CTTZ, dl, VT, Op);
  6418. // If the ZERO_UNDEF version is supported use that and handle the zero case.
  6419. if (isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) {
  6420. EVT SetCCVT =
  6421. getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
  6422. SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
  6423. SDValue Zero = DAG.getConstant(0, dl, VT);
  6424. SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
  6425. return DAG.getSelect(dl, VT, SrcIsZero,
  6426. DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
  6427. }
  6428. // Only expand vector types if we have the appropriate vector bit operations.
  6429. // This includes the operations needed to expand CTPOP if it isn't supported.
  6430. if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
  6431. (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
  6432. !isOperationLegalOrCustom(ISD::CTLZ, VT) &&
  6433. !canExpandVectorCTPOP(*this, VT)) ||
  6434. !isOperationLegalOrCustom(ISD::SUB, VT) ||
  6435. !isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
  6436. !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
  6437. return SDValue();
  6438. // for now, we use: { return popcount(~x & (x - 1)); }
  6439. // unless the target has ctlz but not ctpop, in which case we use:
  6440. // { return 32 - nlz(~x & (x-1)); }
  6441. // Ref: "Hacker's Delight" by Henry Warren
  6442. SDValue Tmp = DAG.getNode(
  6443. ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
  6444. DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
  6445. // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
  6446. if (isOperationLegal(ISD::CTLZ, VT) && !isOperationLegal(ISD::CTPOP, VT)) {
  6447. return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
  6448. DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
  6449. }
  6450. return DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
  6451. }
  6452. SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
  6453. bool IsNegative) const {
  6454. SDLoc dl(N);
  6455. EVT VT = N->getValueType(0);
  6456. EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
  6457. SDValue Op = N->getOperand(0);
  6458. // abs(x) -> smax(x,sub(0,x))
  6459. if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
  6460. isOperationLegal(ISD::SMAX, VT)) {
  6461. SDValue Zero = DAG.getConstant(0, dl, VT);
  6462. return DAG.getNode(ISD::SMAX, dl, VT, Op,
  6463. DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
  6464. }
  6465. // abs(x) -> umin(x,sub(0,x))
  6466. if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
  6467. isOperationLegal(ISD::UMIN, VT)) {
  6468. SDValue Zero = DAG.getConstant(0, dl, VT);
  6469. return DAG.getNode(ISD::UMIN, dl, VT, Op,
  6470. DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
  6471. }
  6472. // 0 - abs(x) -> smin(x, sub(0,x))
  6473. if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
  6474. isOperationLegal(ISD::SMIN, VT)) {
  6475. SDValue Zero = DAG.getConstant(0, dl, VT);
  6476. return DAG.getNode(ISD::SMIN, dl, VT, Op,
  6477. DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
  6478. }
  6479. // Only expand vector types if we have the appropriate vector operations.
  6480. if (VT.isVector() &&
  6481. (!isOperationLegalOrCustom(ISD::SRA, VT) ||
  6482. (!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||
  6483. (IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||
  6484. !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
  6485. return SDValue();
  6486. SDValue Shift =
  6487. DAG.getNode(ISD::SRA, dl, VT, Op,
  6488. DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, ShVT));
  6489. if (!IsNegative) {
  6490. SDValue Add = DAG.getNode(ISD::ADD, dl, VT, Op, Shift);
  6491. return DAG.getNode(ISD::XOR, dl, VT, Add, Shift);
  6492. }
  6493. // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
  6494. SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
  6495. return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
  6496. }
  6497. SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const {
  6498. SDLoc dl(N);
  6499. EVT VT = N->getValueType(0);
  6500. SDValue Op = N->getOperand(0);
  6501. if (!VT.isSimple())
  6502. return SDValue();
  6503. EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
  6504. SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
  6505. switch (VT.getSimpleVT().getScalarType().SimpleTy) {
  6506. default:
  6507. return SDValue();
  6508. case MVT::i16:
  6509. // Use a rotate by 8. This can be further expanded if necessary.
  6510. return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
  6511. case MVT::i32:
  6512. Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
  6513. Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
  6514. Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
  6515. Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
  6516. Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
  6517. DAG.getConstant(0xFF0000, dl, VT));
  6518. Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT));
  6519. Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
  6520. Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
  6521. return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
  6522. case MVT::i64:
  6523. Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
  6524. Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
  6525. Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
  6526. Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
  6527. Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
  6528. Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
  6529. Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
  6530. Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
  6531. Tmp7 = DAG.getNode(ISD::AND, dl, VT, Tmp7,
  6532. DAG.getConstant(255ULL<<48, dl, VT));
  6533. Tmp6 = DAG.getNode(ISD::AND, dl, VT, Tmp6,
  6534. DAG.getConstant(255ULL<<40, dl, VT));
  6535. Tmp5 = DAG.getNode(ISD::AND, dl, VT, Tmp5,
  6536. DAG.getConstant(255ULL<<32, dl, VT));
  6537. Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4,
  6538. DAG.getConstant(255ULL<<24, dl, VT));
  6539. Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
  6540. DAG.getConstant(255ULL<<16, dl, VT));
  6541. Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2,
  6542. DAG.getConstant(255ULL<<8 , dl, VT));
  6543. Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
  6544. Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
  6545. Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
  6546. Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
  6547. Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
  6548. Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
  6549. return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
  6550. }
  6551. }
  6552. SDValue TargetLowering::expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
  6553. SDLoc dl(N);
  6554. EVT VT = N->getValueType(0);
  6555. SDValue Op = N->getOperand(0);
  6556. EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
  6557. unsigned Sz = VT.getScalarSizeInBits();
  6558. SDValue Tmp, Tmp2, Tmp3;
  6559. // If we can, perform BSWAP first and then the mask+swap the i4, then i2
  6560. // and finally the i1 pairs.
  6561. // TODO: We can easily support i4/i2 legal types if any target ever does.
  6562. if (Sz >= 8 && isPowerOf2_32(Sz)) {
  6563. // Create the masks - repeating the pattern every byte.
  6564. APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
  6565. APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
  6566. APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
  6567. // BSWAP if the type is wider than a single byte.
  6568. Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
  6569. // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
  6570. Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT));
  6571. Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask4, dl, VT));
  6572. Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT));
  6573. Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
  6574. Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
  6575. // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
  6576. Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT));
  6577. Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask2, dl, VT));
  6578. Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT));
  6579. Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
  6580. Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
  6581. // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
  6582. Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT));
  6583. Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask1, dl, VT));
  6584. Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT));
  6585. Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
  6586. Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
  6587. return Tmp;
  6588. }
  6589. Tmp = DAG.getConstant(0, dl, VT);
  6590. for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
  6591. if (I < J)
  6592. Tmp2 =
  6593. DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT));
  6594. else
  6595. Tmp2 =
  6596. DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
  6597. APInt Shift(Sz, 1);
  6598. Shift <<= J;
  6599. Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
  6600. Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
  6601. }
  6602. return Tmp;
  6603. }
  6604. std::pair<SDValue, SDValue>
  6605. TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
  6606. SelectionDAG &DAG) const {
  6607. SDLoc SL(LD);
  6608. SDValue Chain = LD->getChain();
  6609. SDValue BasePTR = LD->getBasePtr();
  6610. EVT SrcVT = LD->getMemoryVT();
  6611. EVT DstVT = LD->getValueType(0);
  6612. ISD::LoadExtType ExtType = LD->getExtensionType();
  6613. if (SrcVT.isScalableVector())
  6614. report_fatal_error("Cannot scalarize scalable vector loads");
  6615. unsigned NumElem = SrcVT.getVectorNumElements();
  6616. EVT SrcEltVT = SrcVT.getScalarType();
  6617. EVT DstEltVT = DstVT.getScalarType();
  6618. // A vector must always be stored in memory as-is, i.e. without any padding
  6619. // between the elements, since various code depend on it, e.g. in the
  6620. // handling of a bitcast of a vector type to int, which may be done with a
  6621. // vector store followed by an integer load. A vector that does not have
  6622. // elements that are byte-sized must therefore be stored as an integer
  6623. // built out of the extracted vector elements.
  6624. if (!SrcEltVT.isByteSized()) {
  6625. unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
  6626. EVT LoadVT = EVT::getIntegerVT(*DAG.getContext(), NumLoadBits);
  6627. unsigned NumSrcBits = SrcVT.getSizeInBits();
  6628. EVT SrcIntVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcBits);
  6629. unsigned SrcEltBits = SrcEltVT.getSizeInBits();
  6630. SDValue SrcEltBitMask = DAG.getConstant(
  6631. APInt::getLowBitsSet(NumLoadBits, SrcEltBits), SL, LoadVT);
  6632. // Load the whole vector and avoid masking off the top bits as it makes
  6633. // the codegen worse.
  6634. SDValue Load =
  6635. DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR,
  6636. LD->getPointerInfo(), SrcIntVT, LD->getOriginalAlign(),
  6637. LD->getMemOperand()->getFlags(), LD->getAAInfo());
  6638. SmallVector<SDValue, 8> Vals;
  6639. for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
  6640. unsigned ShiftIntoIdx =
  6641. (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
  6642. SDValue ShiftAmount =
  6643. DAG.getShiftAmountConstant(ShiftIntoIdx * SrcEltVT.getSizeInBits(),
  6644. LoadVT, SL, /*LegalTypes=*/false);
  6645. SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount);
  6646. SDValue Elt =
  6647. DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask);
  6648. SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, Elt);
  6649. if (ExtType != ISD::NON_EXTLOAD) {
  6650. unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType);
  6651. Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar);
  6652. }
  6653. Vals.push_back(Scalar);
  6654. }
  6655. SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
  6656. return std::make_pair(Value, Load.getValue(1));
  6657. }
  6658. unsigned Stride = SrcEltVT.getSizeInBits() / 8;
  6659. assert(SrcEltVT.isByteSized());
  6660. SmallVector<SDValue, 8> Vals;
  6661. SmallVector<SDValue, 8> LoadChains;
  6662. for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
  6663. SDValue ScalarLoad =
  6664. DAG.getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR,
  6665. LD->getPointerInfo().getWithOffset(Idx * Stride),
  6666. SrcEltVT, LD->getOriginalAlign(),
  6667. LD->getMemOperand()->getFlags(), LD->getAAInfo());
  6668. BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::Fixed(Stride));
  6669. Vals.push_back(ScalarLoad.getValue(0));
  6670. LoadChains.push_back(ScalarLoad.getValue(1));
  6671. }
  6672. SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
  6673. SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
  6674. return std::make_pair(Value, NewChain);
  6675. }
  6676. SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
  6677. SelectionDAG &DAG) const {
  6678. SDLoc SL(ST);
  6679. SDValue Chain = ST->getChain();
  6680. SDValue BasePtr = ST->getBasePtr();
  6681. SDValue Value = ST->getValue();
  6682. EVT StVT = ST->getMemoryVT();
  6683. if (StVT.isScalableVector())
  6684. report_fatal_error("Cannot scalarize scalable vector stores");
  6685. // The type of the data we want to save
  6686. EVT RegVT = Value.getValueType();
  6687. EVT RegSclVT = RegVT.getScalarType();
  6688. // The type of data as saved in memory.
  6689. EVT MemSclVT = StVT.getScalarType();
  6690. unsigned NumElem = StVT.getVectorNumElements();
  6691. // A vector must always be stored in memory as-is, i.e. without any padding
  6692. // between the elements, since various code depend on it, e.g. in the
  6693. // handling of a bitcast of a vector type to int, which may be done with a
  6694. // vector store followed by an integer load. A vector that does not have
  6695. // elements that are byte-sized must therefore be stored as an integer
  6696. // built out of the extracted vector elements.
  6697. if (!MemSclVT.isByteSized()) {
  6698. unsigned NumBits = StVT.getSizeInBits();
  6699. EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
  6700. SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
  6701. for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
  6702. SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
  6703. DAG.getVectorIdxConstant(Idx, SL));
  6704. SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
  6705. SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
  6706. unsigned ShiftIntoIdx =
  6707. (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
  6708. SDValue ShiftAmount =
  6709. DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
  6710. SDValue ShiftedElt =
  6711. DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
  6712. CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
  6713. }
  6714. return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
  6715. ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
  6716. ST->getAAInfo());
  6717. }
  6718. // Store Stride in bytes
  6719. unsigned Stride = MemSclVT.getSizeInBits() / 8;
  6720. assert(Stride && "Zero stride!");
  6721. // Extract each of the elements from the original vector and save them into
  6722. // memory individually.
  6723. SmallVector<SDValue, 8> Stores;
  6724. for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
  6725. SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
  6726. DAG.getVectorIdxConstant(Idx, SL));
  6727. SDValue Ptr =
  6728. DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::Fixed(Idx * Stride));
  6729. // This scalar TruncStore may be illegal, but we legalize it later.
  6730. SDValue Store = DAG.getTruncStore(
  6731. Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
  6732. MemSclVT, ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
  6733. ST->getAAInfo());
  6734. Stores.push_back(Store);
  6735. }
  6736. return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
  6737. }
  6738. std::pair<SDValue, SDValue>
  6739. TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
  6740. assert(LD->getAddressingMode() == ISD::UNINDEXED &&
  6741. "unaligned indexed loads not implemented!");
  6742. SDValue Chain = LD->getChain();
  6743. SDValue Ptr = LD->getBasePtr();
  6744. EVT VT = LD->getValueType(0);
  6745. EVT LoadedVT = LD->getMemoryVT();
  6746. SDLoc dl(LD);
  6747. auto &MF = DAG.getMachineFunction();
  6748. if (VT.isFloatingPoint() || VT.isVector()) {
  6749. EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
  6750. if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
  6751. if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
  6752. LoadedVT.isVector()) {
  6753. // Scalarize the load and let the individual components be handled.
  6754. return scalarizeVectorLoad(LD, DAG);
  6755. }
  6756. // Expand to a (misaligned) integer load of the same size,
  6757. // then bitconvert to floating point or vector.
  6758. SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
  6759. LD->getMemOperand());
  6760. SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
  6761. if (LoadedVT != VT)
  6762. Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
  6763. ISD::ANY_EXTEND, dl, VT, Result);
  6764. return std::make_pair(Result, newLoad.getValue(1));
  6765. }
  6766. // Copy the value to a (aligned) stack slot using (unaligned) integer
  6767. // loads and stores, then do a (aligned) load from the stack slot.
  6768. MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
  6769. unsigned LoadedBytes = LoadedVT.getStoreSize();
  6770. unsigned RegBytes = RegVT.getSizeInBits() / 8;
  6771. unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
  6772. // Make sure the stack slot is also aligned for the register type.
  6773. SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
  6774. auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
  6775. SmallVector<SDValue, 8> Stores;
  6776. SDValue StackPtr = StackBase;
  6777. unsigned Offset = 0;
  6778. EVT PtrVT = Ptr.getValueType();
  6779. EVT StackPtrVT = StackPtr.getValueType();
  6780. SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
  6781. SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
  6782. // Do all but one copies using the full register width.
  6783. for (unsigned i = 1; i < NumRegs; i++) {
  6784. // Load one integer register's worth from the original location.
  6785. SDValue Load = DAG.getLoad(
  6786. RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
  6787. LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
  6788. LD->getAAInfo());
  6789. // Follow the load with a store to the stack slot. Remember the store.
  6790. Stores.push_back(DAG.getStore(
  6791. Load.getValue(1), dl, Load, StackPtr,
  6792. MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
  6793. // Increment the pointers.
  6794. Offset += RegBytes;
  6795. Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
  6796. StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
  6797. }
  6798. // The last copy may be partial. Do an extending load.
  6799. EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
  6800. 8 * (LoadedBytes - Offset));
  6801. SDValue Load =
  6802. DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
  6803. LD->getPointerInfo().getWithOffset(Offset), MemVT,
  6804. LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
  6805. LD->getAAInfo());
  6806. // Follow the load with a store to the stack slot. Remember the store.
  6807. // On big-endian machines this requires a truncating store to ensure
  6808. // that the bits end up in the right place.
  6809. Stores.push_back(DAG.getTruncStore(
  6810. Load.getValue(1), dl, Load, StackPtr,
  6811. MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
  6812. // The order of the stores doesn't matter - say it with a TokenFactor.
  6813. SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
  6814. // Finally, perform the original load only redirected to the stack slot.
  6815. Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
  6816. MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
  6817. LoadedVT);
  6818. // Callers expect a MERGE_VALUES node.
  6819. return std::make_pair(Load, TF);
  6820. }
  6821. assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
  6822. "Unaligned load of unsupported type.");
  6823. // Compute the new VT that is half the size of the old one. This is an
  6824. // integer MVT.
  6825. unsigned NumBits = LoadedVT.getSizeInBits();
  6826. EVT NewLoadedVT;
  6827. NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
  6828. NumBits >>= 1;
  6829. Align Alignment = LD->getOriginalAlign();
  6830. unsigned IncrementSize = NumBits / 8;
  6831. ISD::LoadExtType HiExtType = LD->getExtensionType();
  6832. // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
  6833. if (HiExtType == ISD::NON_EXTLOAD)
  6834. HiExtType = ISD::ZEXTLOAD;
  6835. // Load the value in two parts
  6836. SDValue Lo, Hi;
  6837. if (DAG.getDataLayout().isLittleEndian()) {
  6838. Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
  6839. NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
  6840. LD->getAAInfo());
  6841. Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
  6842. Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
  6843. LD->getPointerInfo().getWithOffset(IncrementSize),
  6844. NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
  6845. LD->getAAInfo());
  6846. } else {
  6847. Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
  6848. NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
  6849. LD->getAAInfo());
  6850. Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
  6851. Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
  6852. LD->getPointerInfo().getWithOffset(IncrementSize),
  6853. NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
  6854. LD->getAAInfo());
  6855. }
  6856. // aggregate the two parts
  6857. SDValue ShiftAmount =
  6858. DAG.getConstant(NumBits, dl, getShiftAmountTy(Hi.getValueType(),
  6859. DAG.getDataLayout()));
  6860. SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
  6861. Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
  6862. SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
  6863. Hi.getValue(1));
  6864. return std::make_pair(Result, TF);
  6865. }
  6866. SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
  6867. SelectionDAG &DAG) const {
  6868. assert(ST->getAddressingMode() == ISD::UNINDEXED &&
  6869. "unaligned indexed stores not implemented!");
  6870. SDValue Chain = ST->getChain();
  6871. SDValue Ptr = ST->getBasePtr();
  6872. SDValue Val = ST->getValue();
  6873. EVT VT = Val.getValueType();
  6874. Align Alignment = ST->getOriginalAlign();
  6875. auto &MF = DAG.getMachineFunction();
  6876. EVT StoreMemVT = ST->getMemoryVT();
  6877. SDLoc dl(ST);
  6878. if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
  6879. EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
  6880. if (isTypeLegal(intVT)) {
  6881. if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
  6882. StoreMemVT.isVector()) {
  6883. // Scalarize the store and let the individual components be handled.
  6884. SDValue Result = scalarizeVectorStore(ST, DAG);
  6885. return Result;
  6886. }
  6887. // Expand to a bitconvert of the value to the integer type of the
  6888. // same size, then a (misaligned) int store.
  6889. // FIXME: Does not handle truncating floating point stores!
  6890. SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
  6891. Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
  6892. Alignment, ST->getMemOperand()->getFlags());
  6893. return Result;
  6894. }
  6895. // Do a (aligned) store to a stack slot, then copy from the stack slot
  6896. // to the final destination using (unaligned) integer loads and stores.
  6897. MVT RegVT = getRegisterType(
  6898. *DAG.getContext(),
  6899. EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
  6900. EVT PtrVT = Ptr.getValueType();
  6901. unsigned StoredBytes = StoreMemVT.getStoreSize();
  6902. unsigned RegBytes = RegVT.getSizeInBits() / 8;
  6903. unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
  6904. // Make sure the stack slot is also aligned for the register type.
  6905. SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
  6906. auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
  6907. // Perform the original store, only redirected to the stack slot.
  6908. SDValue Store = DAG.getTruncStore(
  6909. Chain, dl, Val, StackPtr,
  6910. MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
  6911. EVT StackPtrVT = StackPtr.getValueType();
  6912. SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
  6913. SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
  6914. SmallVector<SDValue, 8> Stores;
  6915. unsigned Offset = 0;
  6916. // Do all but one copies using the full register width.
  6917. for (unsigned i = 1; i < NumRegs; i++) {
  6918. // Load one integer register's worth from the stack slot.
  6919. SDValue Load = DAG.getLoad(
  6920. RegVT, dl, Store, StackPtr,
  6921. MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
  6922. // Store it to the final location. Remember the store.
  6923. Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
  6924. ST->getPointerInfo().getWithOffset(Offset),
  6925. ST->getOriginalAlign(),
  6926. ST->getMemOperand()->getFlags()));
  6927. // Increment the pointers.
  6928. Offset += RegBytes;
  6929. StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
  6930. Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
  6931. }
  6932. // The last store may be partial. Do a truncating store. On big-endian
  6933. // machines this requires an extending load from the stack slot to ensure
  6934. // that the bits are in the right place.
  6935. EVT LoadMemVT =
  6936. EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
  6937. // Load from the stack slot.
  6938. SDValue Load = DAG.getExtLoad(
  6939. ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
  6940. MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
  6941. Stores.push_back(
  6942. DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
  6943. ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
  6944. ST->getOriginalAlign(),
  6945. ST->getMemOperand()->getFlags(), ST->getAAInfo()));
  6946. // The order of the stores doesn't matter - say it with a TokenFactor.
  6947. SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
  6948. return Result;
  6949. }
  6950. assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
  6951. "Unaligned store of unknown type.");
  6952. // Get the half-size VT
  6953. EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
  6954. unsigned NumBits = NewStoredVT.getFixedSizeInBits();
  6955. unsigned IncrementSize = NumBits / 8;
  6956. // Divide the stored value in two parts.
  6957. SDValue ShiftAmount = DAG.getConstant(
  6958. NumBits, dl, getShiftAmountTy(Val.getValueType(), DAG.getDataLayout()));
  6959. SDValue Lo = Val;
  6960. SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
  6961. // Store the two parts
  6962. SDValue Store1, Store2;
  6963. Store1 = DAG.getTruncStore(Chain, dl,
  6964. DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
  6965. Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
  6966. ST->getMemOperand()->getFlags());
  6967. Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
  6968. Store2 = DAG.getTruncStore(
  6969. Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
  6970. ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
  6971. ST->getMemOperand()->getFlags(), ST->getAAInfo());
  6972. SDValue Result =
  6973. DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
  6974. return Result;
  6975. }
  6976. SDValue
  6977. TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
  6978. const SDLoc &DL, EVT DataVT,
  6979. SelectionDAG &DAG,
  6980. bool IsCompressedMemory) const {
  6981. SDValue Increment;
  6982. EVT AddrVT = Addr.getValueType();
  6983. EVT MaskVT = Mask.getValueType();
  6984. assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
  6985. "Incompatible types of Data and Mask");
  6986. if (IsCompressedMemory) {
  6987. if (DataVT.isScalableVector())
  6988. report_fatal_error(
  6989. "Cannot currently handle compressed memory with scalable vectors");
  6990. // Incrementing the pointer according to number of '1's in the mask.
  6991. EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
  6992. SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
  6993. if (MaskIntVT.getSizeInBits() < 32) {
  6994. MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
  6995. MaskIntVT = MVT::i32;
  6996. }
  6997. // Count '1's with POPCNT.
  6998. Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
  6999. Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
  7000. // Scale is an element size in bytes.
  7001. SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
  7002. AddrVT);
  7003. Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
  7004. } else if (DataVT.isScalableVector()) {
  7005. Increment = DAG.getVScale(DL, AddrVT,
  7006. APInt(AddrVT.getFixedSizeInBits(),
  7007. DataVT.getStoreSize().getKnownMinSize()));
  7008. } else
  7009. Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
  7010. return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
  7011. }
  7012. static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx,
  7013. EVT VecVT, const SDLoc &dl,
  7014. ElementCount SubEC) {
  7015. assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
  7016. "Cannot index a scalable vector within a fixed-width vector");
  7017. unsigned NElts = VecVT.getVectorMinNumElements();
  7018. unsigned NumSubElts = SubEC.getKnownMinValue();
  7019. EVT IdxVT = Idx.getValueType();
  7020. if (VecVT.isScalableVector() && !SubEC.isScalable()) {
  7021. // If this is a constant index and we know the value plus the number of the
  7022. // elements in the subvector minus one is less than the minimum number of
  7023. // elements then it's safe to return Idx.
  7024. if (auto *IdxCst = dyn_cast<ConstantSDNode>(Idx))
  7025. if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts)
  7026. return Idx;
  7027. SDValue VS =
  7028. DAG.getVScale(dl, IdxVT, APInt(IdxVT.getFixedSizeInBits(), NElts));
  7029. unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
  7030. SDValue Sub = DAG.getNode(SubOpcode, dl, IdxVT, VS,
  7031. DAG.getConstant(NumSubElts, dl, IdxVT));
  7032. return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, Sub);
  7033. }
  7034. if (isPowerOf2_32(NElts) && NumSubElts == 1) {
  7035. APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), Log2_32(NElts));
  7036. return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
  7037. DAG.getConstant(Imm, dl, IdxVT));
  7038. }
  7039. unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0;
  7040. return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
  7041. DAG.getConstant(MaxIndex, dl, IdxVT));
  7042. }
  7043. SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
  7044. SDValue VecPtr, EVT VecVT,
  7045. SDValue Index) const {
  7046. return getVectorSubVecPointer(
  7047. DAG, VecPtr, VecVT,
  7048. EVT::getVectorVT(*DAG.getContext(), VecVT.getVectorElementType(), 1),
  7049. Index);
  7050. }
  7051. SDValue TargetLowering::getVectorSubVecPointer(SelectionDAG &DAG,
  7052. SDValue VecPtr, EVT VecVT,
  7053. EVT SubVecVT,
  7054. SDValue Index) const {
  7055. SDLoc dl(Index);
  7056. // Make sure the index type is big enough to compute in.
  7057. Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
  7058. EVT EltVT = VecVT.getVectorElementType();
  7059. // Calculate the element offset and add it to the pointer.
  7060. unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
  7061. assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
  7062. "Converting bits to bytes lost precision");
  7063. assert(SubVecVT.getVectorElementType() == EltVT &&
  7064. "Sub-vector must be a vector with matching element type");
  7065. Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
  7066. SubVecVT.getVectorElementCount());
  7067. EVT IdxVT = Index.getValueType();
  7068. if (SubVecVT.isScalableVector())
  7069. Index =
  7070. DAG.getNode(ISD::MUL, dl, IdxVT, Index,
  7071. DAG.getVScale(dl, IdxVT, APInt(IdxVT.getSizeInBits(), 1)));
  7072. Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
  7073. DAG.getConstant(EltSize, dl, IdxVT));
  7074. return DAG.getMemBasePlusOffset(VecPtr, Index, dl);
  7075. }
  7076. //===----------------------------------------------------------------------===//
  7077. // Implementation of Emulated TLS Model
  7078. //===----------------------------------------------------------------------===//
  7079. SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
  7080. SelectionDAG &DAG) const {
  7081. // Access to address of TLS varialbe xyz is lowered to a function call:
  7082. // __emutls_get_address( address of global variable named "__emutls_v.xyz" )
  7083. EVT PtrVT = getPointerTy(DAG.getDataLayout());
  7084. PointerType *VoidPtrType = Type::getInt8PtrTy(*DAG.getContext());
  7085. SDLoc dl(GA);
  7086. ArgListTy Args;
  7087. ArgListEntry Entry;
  7088. std::string NameString = ("__emutls_v." + GA->getGlobal()->getName()).str();
  7089. Module *VariableModule = const_cast<Module*>(GA->getGlobal()->getParent());
  7090. StringRef EmuTlsVarName(NameString);
  7091. GlobalVariable *EmuTlsVar = VariableModule->getNamedGlobal(EmuTlsVarName);
  7092. assert(EmuTlsVar && "Cannot find EmuTlsVar ");
  7093. Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT);
  7094. Entry.Ty = VoidPtrType;
  7095. Args.push_back(Entry);
  7096. SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
  7097. TargetLowering::CallLoweringInfo CLI(DAG);
  7098. CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
  7099. CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
  7100. std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
  7101. // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
  7102. // At last for X86 targets, maybe good for other targets too?
  7103. MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
  7104. MFI.setAdjustsStack(true); // Is this only for X86 target?
  7105. MFI.setHasCalls(true);
  7106. assert((GA->getOffset() == 0) &&
  7107. "Emulated TLS must have zero offset in GlobalAddressSDNode");
  7108. return CallResult.first;
  7109. }
  7110. SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
  7111. SelectionDAG &DAG) const {
  7112. assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
  7113. if (!isCtlzFast())
  7114. return SDValue();
  7115. ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
  7116. SDLoc dl(Op);
  7117. if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
  7118. if (C->isZero() && CC == ISD::SETEQ) {
  7119. EVT VT = Op.getOperand(0).getValueType();
  7120. SDValue Zext = Op.getOperand(0);
  7121. if (VT.bitsLT(MVT::i32)) {
  7122. VT = MVT::i32;
  7123. Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
  7124. }
  7125. unsigned Log2b = Log2_32(VT.getSizeInBits());
  7126. SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
  7127. SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
  7128. DAG.getConstant(Log2b, dl, MVT::i32));
  7129. return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
  7130. }
  7131. }
  7132. return SDValue();
  7133. }
  7134. // Convert redundant addressing modes (e.g. scaling is redundant
  7135. // when accessing bytes).
  7136. ISD::MemIndexType
  7137. TargetLowering::getCanonicalIndexType(ISD::MemIndexType IndexType, EVT MemVT,
  7138. SDValue Offsets) const {
  7139. bool IsScaledIndex =
  7140. (IndexType == ISD::SIGNED_SCALED) || (IndexType == ISD::UNSIGNED_SCALED);
  7141. bool IsSignedIndex =
  7142. (IndexType == ISD::SIGNED_SCALED) || (IndexType == ISD::SIGNED_UNSCALED);
  7143. // Scaling is unimportant for bytes, canonicalize to unscaled.
  7144. if (IsScaledIndex && MemVT.getScalarType() == MVT::i8)
  7145. return IsSignedIndex ? ISD::SIGNED_UNSCALED : ISD::UNSIGNED_UNSCALED;
  7146. return IndexType;
  7147. }
  7148. SDValue TargetLowering::expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const {
  7149. SDValue Op0 = Node->getOperand(0);
  7150. SDValue Op1 = Node->getOperand(1);
  7151. EVT VT = Op0.getValueType();
  7152. unsigned Opcode = Node->getOpcode();
  7153. SDLoc DL(Node);
  7154. // umin(x,y) -> sub(x,usubsat(x,y))
  7155. if (Opcode == ISD::UMIN && isOperationLegal(ISD::SUB, VT) &&
  7156. isOperationLegal(ISD::USUBSAT, VT)) {
  7157. return DAG.getNode(ISD::SUB, DL, VT, Op0,
  7158. DAG.getNode(ISD::USUBSAT, DL, VT, Op0, Op1));
  7159. }
  7160. // umax(x,y) -> add(x,usubsat(y,x))
  7161. if (Opcode == ISD::UMAX && isOperationLegal(ISD::ADD, VT) &&
  7162. isOperationLegal(ISD::USUBSAT, VT)) {
  7163. return DAG.getNode(ISD::ADD, DL, VT, Op0,
  7164. DAG.getNode(ISD::USUBSAT, DL, VT, Op1, Op0));
  7165. }
  7166. // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
  7167. ISD::CondCode CC;
  7168. switch (Opcode) {
  7169. default: llvm_unreachable("How did we get here?");
  7170. case ISD::SMAX: CC = ISD::SETGT; break;
  7171. case ISD::SMIN: CC = ISD::SETLT; break;
  7172. case ISD::UMAX: CC = ISD::SETUGT; break;
  7173. case ISD::UMIN: CC = ISD::SETULT; break;
  7174. }
  7175. // FIXME: Should really try to split the vector in case it's legal on a
  7176. // subvector.
  7177. if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
  7178. return DAG.UnrollVectorOp(Node);
  7179. EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
  7180. SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
  7181. return DAG.getSelect(DL, VT, Cond, Op0, Op1);
  7182. }
  7183. SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
  7184. unsigned Opcode = Node->getOpcode();
  7185. SDValue LHS = Node->getOperand(0);
  7186. SDValue RHS = Node->getOperand(1);
  7187. EVT VT = LHS.getValueType();
  7188. SDLoc dl(Node);
  7189. assert(VT == RHS.getValueType() && "Expected operands to be the same type");
  7190. assert(VT.isInteger() && "Expected operands to be integers");
  7191. // usub.sat(a, b) -> umax(a, b) - b
  7192. if (Opcode == ISD::USUBSAT && isOperationLegal(ISD::UMAX, VT)) {
  7193. SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
  7194. return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
  7195. }
  7196. // uadd.sat(a, b) -> umin(a, ~b) + b
  7197. if (Opcode == ISD::UADDSAT && isOperationLegal(ISD::UMIN, VT)) {
  7198. SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
  7199. SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
  7200. return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
  7201. }
  7202. unsigned OverflowOp;
  7203. switch (Opcode) {
  7204. case ISD::SADDSAT:
  7205. OverflowOp = ISD::SADDO;
  7206. break;
  7207. case ISD::UADDSAT:
  7208. OverflowOp = ISD::UADDO;
  7209. break;
  7210. case ISD::SSUBSAT:
  7211. OverflowOp = ISD::SSUBO;
  7212. break;
  7213. case ISD::USUBSAT:
  7214. OverflowOp = ISD::USUBO;
  7215. break;
  7216. default:
  7217. llvm_unreachable("Expected method to receive signed or unsigned saturation "
  7218. "addition or subtraction node.");
  7219. }
  7220. // FIXME: Should really try to split the vector in case it's legal on a
  7221. // subvector.
  7222. if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
  7223. return DAG.UnrollVectorOp(Node);
  7224. unsigned BitWidth = LHS.getScalarValueSizeInBits();
  7225. EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
  7226. SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
  7227. SDValue SumDiff = Result.getValue(0);
  7228. SDValue Overflow = Result.getValue(1);
  7229. SDValue Zero = DAG.getConstant(0, dl, VT);
  7230. SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
  7231. if (Opcode == ISD::UADDSAT) {
  7232. if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
  7233. // (LHS + RHS) | OverflowMask
  7234. SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
  7235. return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
  7236. }
  7237. // Overflow ? 0xffff.... : (LHS + RHS)
  7238. return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
  7239. }
  7240. if (Opcode == ISD::USUBSAT) {
  7241. if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
  7242. // (LHS - RHS) & ~OverflowMask
  7243. SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
  7244. SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
  7245. return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
  7246. }
  7247. // Overflow ? 0 : (LHS - RHS)
  7248. return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
  7249. }
  7250. // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
  7251. APInt MinVal = APInt::getSignedMinValue(BitWidth);
  7252. SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
  7253. SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, SumDiff,
  7254. DAG.getConstant(BitWidth - 1, dl, VT));
  7255. Result = DAG.getNode(ISD::XOR, dl, VT, Shift, SatMin);
  7256. return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
  7257. }
  7258. SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const {
  7259. unsigned Opcode = Node->getOpcode();
  7260. bool IsSigned = Opcode == ISD::SSHLSAT;
  7261. SDValue LHS = Node->getOperand(0);
  7262. SDValue RHS = Node->getOperand(1);
  7263. EVT VT = LHS.getValueType();
  7264. SDLoc dl(Node);
  7265. assert((Node->getOpcode() == ISD::SSHLSAT ||
  7266. Node->getOpcode() == ISD::USHLSAT) &&
  7267. "Expected a SHLSAT opcode");
  7268. assert(VT == RHS.getValueType() && "Expected operands to be the same type");
  7269. assert(VT.isInteger() && "Expected operands to be integers");
  7270. // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
  7271. unsigned BW = VT.getScalarSizeInBits();
  7272. SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS);
  7273. SDValue Orig =
  7274. DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS);
  7275. SDValue SatVal;
  7276. if (IsSigned) {
  7277. SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT);
  7278. SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT);
  7279. SatVal = DAG.getSelectCC(dl, LHS, DAG.getConstant(0, dl, VT),
  7280. SatMin, SatMax, ISD::SETLT);
  7281. } else {
  7282. SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT);
  7283. }
  7284. Result = DAG.getSelectCC(dl, LHS, Orig, SatVal, Result, ISD::SETNE);
  7285. return Result;
  7286. }
  7287. SDValue
  7288. TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
  7289. assert((Node->getOpcode() == ISD::SMULFIX ||
  7290. Node->getOpcode() == ISD::UMULFIX ||
  7291. Node->getOpcode() == ISD::SMULFIXSAT ||
  7292. Node->getOpcode() == ISD::UMULFIXSAT) &&
  7293. "Expected a fixed point multiplication opcode");
  7294. SDLoc dl(Node);
  7295. SDValue LHS = Node->getOperand(0);
  7296. SDValue RHS = Node->getOperand(1);
  7297. EVT VT = LHS.getValueType();
  7298. unsigned Scale = Node->getConstantOperandVal(2);
  7299. bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
  7300. Node->getOpcode() == ISD::UMULFIXSAT);
  7301. bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
  7302. Node->getOpcode() == ISD::SMULFIXSAT);
  7303. EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
  7304. unsigned VTSize = VT.getScalarSizeInBits();
  7305. if (!Scale) {
  7306. // [us]mul.fix(a, b, 0) -> mul(a, b)
  7307. if (!Saturating) {
  7308. if (isOperationLegalOrCustom(ISD::MUL, VT))
  7309. return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
  7310. } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
  7311. SDValue Result =
  7312. DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
  7313. SDValue Product = Result.getValue(0);
  7314. SDValue Overflow = Result.getValue(1);
  7315. SDValue Zero = DAG.getConstant(0, dl, VT);
  7316. APInt MinVal = APInt::getSignedMinValue(VTSize);
  7317. APInt MaxVal = APInt::getSignedMaxValue(VTSize);
  7318. SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
  7319. SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
  7320. // Xor the inputs, if resulting sign bit is 0 the product will be
  7321. // positive, else negative.
  7322. SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
  7323. SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
  7324. Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
  7325. return DAG.getSelect(dl, VT, Overflow, Result, Product);
  7326. } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
  7327. SDValue Result =
  7328. DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
  7329. SDValue Product = Result.getValue(0);
  7330. SDValue Overflow = Result.getValue(1);
  7331. APInt MaxVal = APInt::getMaxValue(VTSize);
  7332. SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
  7333. return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
  7334. }
  7335. }
  7336. assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
  7337. "Expected scale to be less than the number of bits if signed or at "
  7338. "most the number of bits if unsigned.");
  7339. assert(LHS.getValueType() == RHS.getValueType() &&
  7340. "Expected both operands to be the same type");
  7341. // Get the upper and lower bits of the result.
  7342. SDValue Lo, Hi;
  7343. unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
  7344. unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
  7345. if (isOperationLegalOrCustom(LoHiOp, VT)) {
  7346. SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
  7347. Lo = Result.getValue(0);
  7348. Hi = Result.getValue(1);
  7349. } else if (isOperationLegalOrCustom(HiOp, VT)) {
  7350. Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
  7351. Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
  7352. } else if (VT.isVector()) {
  7353. return SDValue();
  7354. } else {
  7355. report_fatal_error("Unable to expand fixed point multiplication.");
  7356. }
  7357. if (Scale == VTSize)
  7358. // Result is just the top half since we'd be shifting by the width of the
  7359. // operand. Overflow impossible so this works for both UMULFIX and
  7360. // UMULFIXSAT.
  7361. return Hi;
  7362. // The result will need to be shifted right by the scale since both operands
  7363. // are scaled. The result is given to us in 2 halves, so we only want part of
  7364. // both in the result.
  7365. EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout());
  7366. SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
  7367. DAG.getConstant(Scale, dl, ShiftTy));
  7368. if (!Saturating)
  7369. return Result;
  7370. if (!Signed) {
  7371. // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
  7372. // widened multiplication) aren't all zeroes.
  7373. // Saturate to max if ((Hi >> Scale) != 0),
  7374. // which is the same as if (Hi > ((1 << Scale) - 1))
  7375. APInt MaxVal = APInt::getMaxValue(VTSize);
  7376. SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
  7377. dl, VT);
  7378. Result = DAG.getSelectCC(dl, Hi, LowMask,
  7379. DAG.getConstant(MaxVal, dl, VT), Result,
  7380. ISD::SETUGT);
  7381. return Result;
  7382. }
  7383. // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
  7384. // widened multiplication) aren't all ones or all zeroes.
  7385. SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
  7386. SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
  7387. if (Scale == 0) {
  7388. SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
  7389. DAG.getConstant(VTSize - 1, dl, ShiftTy));
  7390. SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
  7391. // Saturated to SatMin if wide product is negative, and SatMax if wide
  7392. // product is positive ...
  7393. SDValue Zero = DAG.getConstant(0, dl, VT);
  7394. SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
  7395. ISD::SETLT);
  7396. // ... but only if we overflowed.
  7397. return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
  7398. }
  7399. // We handled Scale==0 above so all the bits to examine is in Hi.
  7400. // Saturate to max if ((Hi >> (Scale - 1)) > 0),
  7401. // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
  7402. SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
  7403. dl, VT);
  7404. Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
  7405. // Saturate to min if (Hi >> (Scale - 1)) < -1),
  7406. // which is the same as if (HI < (-1 << (Scale - 1))
  7407. SDValue HighMask =
  7408. DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
  7409. dl, VT);
  7410. Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
  7411. return Result;
  7412. }
  7413. SDValue
  7414. TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
  7415. SDValue LHS, SDValue RHS,
  7416. unsigned Scale, SelectionDAG &DAG) const {
  7417. assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
  7418. Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
  7419. "Expected a fixed point division opcode");
  7420. EVT VT = LHS.getValueType();
  7421. bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
  7422. bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
  7423. EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
  7424. // If there is enough room in the type to upscale the LHS or downscale the
  7425. // RHS before the division, we can perform it in this type without having to
  7426. // resize. For signed operations, the LHS headroom is the number of
  7427. // redundant sign bits, and for unsigned ones it is the number of zeroes.
  7428. // The headroom for the RHS is the number of trailing zeroes.
  7429. unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1
  7430. : DAG.computeKnownBits(LHS).countMinLeadingZeros();
  7431. unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
  7432. // For signed saturating operations, we need to be able to detect true integer
  7433. // division overflow; that is, when you have MIN / -EPS. However, this
  7434. // is undefined behavior and if we emit divisions that could take such
  7435. // values it may cause undesired behavior (arithmetic exceptions on x86, for
  7436. // example).
  7437. // Avoid this by requiring an extra bit so that we never get this case.
  7438. // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
  7439. // signed saturating division, we need to emit a whopping 32-bit division.
  7440. if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
  7441. return SDValue();
  7442. unsigned LHSShift = std::min(LHSLead, Scale);
  7443. unsigned RHSShift = Scale - LHSShift;
  7444. // At this point, we know that if we shift the LHS up by LHSShift and the
  7445. // RHS down by RHSShift, we can emit a regular division with a final scaling
  7446. // factor of Scale.
  7447. EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout());
  7448. if (LHSShift)
  7449. LHS = DAG.getNode(ISD::SHL, dl, VT, LHS,
  7450. DAG.getConstant(LHSShift, dl, ShiftTy));
  7451. if (RHSShift)
  7452. RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS,
  7453. DAG.getConstant(RHSShift, dl, ShiftTy));
  7454. SDValue Quot;
  7455. if (Signed) {
  7456. // For signed operations, if the resulting quotient is negative and the
  7457. // remainder is nonzero, subtract 1 from the quotient to round towards
  7458. // negative infinity.
  7459. SDValue Rem;
  7460. // FIXME: Ideally we would always produce an SDIVREM here, but if the
  7461. // type isn't legal, SDIVREM cannot be expanded. There is no reason why
  7462. // we couldn't just form a libcall, but the type legalizer doesn't do it.
  7463. if (isTypeLegal(VT) &&
  7464. isOperationLegalOrCustom(ISD::SDIVREM, VT)) {
  7465. Quot = DAG.getNode(ISD::SDIVREM, dl,
  7466. DAG.getVTList(VT, VT),
  7467. LHS, RHS);
  7468. Rem = Quot.getValue(1);
  7469. Quot = Quot.getValue(0);
  7470. } else {
  7471. Quot = DAG.getNode(ISD::SDIV, dl, VT,
  7472. LHS, RHS);
  7473. Rem = DAG.getNode(ISD::SREM, dl, VT,
  7474. LHS, RHS);
  7475. }
  7476. SDValue Zero = DAG.getConstant(0, dl, VT);
  7477. SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE);
  7478. SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT);
  7479. SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT);
  7480. SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg);
  7481. SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot,
  7482. DAG.getConstant(1, dl, VT));
  7483. Quot = DAG.getSelect(dl, VT,
  7484. DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg),
  7485. Sub1, Quot);
  7486. } else
  7487. Quot = DAG.getNode(ISD::UDIV, dl, VT,
  7488. LHS, RHS);
  7489. return Quot;
  7490. }
  7491. void TargetLowering::expandUADDSUBO(
  7492. SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
  7493. SDLoc dl(Node);
  7494. SDValue LHS = Node->getOperand(0);
  7495. SDValue RHS = Node->getOperand(1);
  7496. bool IsAdd = Node->getOpcode() == ISD::UADDO;
  7497. // If ADD/SUBCARRY is legal, use that instead.
  7498. unsigned OpcCarry = IsAdd ? ISD::ADDCARRY : ISD::SUBCARRY;
  7499. if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
  7500. SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
  7501. SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
  7502. { LHS, RHS, CarryIn });
  7503. Result = SDValue(NodeCarry.getNode(), 0);
  7504. Overflow = SDValue(NodeCarry.getNode(), 1);
  7505. return;
  7506. }
  7507. Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
  7508. LHS.getValueType(), LHS, RHS);
  7509. EVT ResultType = Node->getValueType(1);
  7510. EVT SetCCType = getSetCCResultType(
  7511. DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
  7512. ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
  7513. SDValue SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
  7514. Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
  7515. }
  7516. void TargetLowering::expandSADDSUBO(
  7517. SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
  7518. SDLoc dl(Node);
  7519. SDValue LHS = Node->getOperand(0);
  7520. SDValue RHS = Node->getOperand(1);
  7521. bool IsAdd = Node->getOpcode() == ISD::SADDO;
  7522. Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
  7523. LHS.getValueType(), LHS, RHS);
  7524. EVT ResultType = Node->getValueType(1);
  7525. EVT OType = getSetCCResultType(
  7526. DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
  7527. // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
  7528. unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
  7529. if (isOperationLegal(OpcSat, LHS.getValueType())) {
  7530. SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
  7531. SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
  7532. Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
  7533. return;
  7534. }
  7535. SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
  7536. // For an addition, the result should be less than one of the operands (LHS)
  7537. // if and only if the other operand (RHS) is negative, otherwise there will
  7538. // be overflow.
  7539. // For a subtraction, the result should be less than one of the operands
  7540. // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
  7541. // otherwise there will be overflow.
  7542. SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
  7543. SDValue ConditionRHS =
  7544. DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT);
  7545. Overflow = DAG.getBoolExtOrTrunc(
  7546. DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl,
  7547. ResultType, ResultType);
  7548. }
  7549. bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
  7550. SDValue &Overflow, SelectionDAG &DAG) const {
  7551. SDLoc dl(Node);
  7552. EVT VT = Node->getValueType(0);
  7553. EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
  7554. SDValue LHS = Node->getOperand(0);
  7555. SDValue RHS = Node->getOperand(1);
  7556. bool isSigned = Node->getOpcode() == ISD::SMULO;
  7557. // For power-of-two multiplications we can use a simpler shift expansion.
  7558. if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
  7559. const APInt &C = RHSC->getAPIntValue();
  7560. // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
  7561. if (C.isPowerOf2()) {
  7562. // smulo(x, signed_min) is same as umulo(x, signed_min).
  7563. bool UseArithShift = isSigned && !C.isMinSignedValue();
  7564. EVT ShiftAmtTy = getShiftAmountTy(VT, DAG.getDataLayout());
  7565. SDValue ShiftAmt = DAG.getConstant(C.logBase2(), dl, ShiftAmtTy);
  7566. Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
  7567. Overflow = DAG.getSetCC(dl, SetCCVT,
  7568. DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
  7569. dl, VT, Result, ShiftAmt),
  7570. LHS, ISD::SETNE);
  7571. return true;
  7572. }
  7573. }
  7574. EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
  7575. if (VT.isVector())
  7576. WideVT =
  7577. EVT::getVectorVT(*DAG.getContext(), WideVT, VT.getVectorElementCount());
  7578. SDValue BottomHalf;
  7579. SDValue TopHalf;
  7580. static const unsigned Ops[2][3] =
  7581. { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
  7582. { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
  7583. if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
  7584. BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
  7585. TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
  7586. } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
  7587. BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
  7588. RHS);
  7589. TopHalf = BottomHalf.getValue(1);
  7590. } else if (isTypeLegal(WideVT)) {
  7591. LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
  7592. RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
  7593. SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
  7594. BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
  7595. SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits(), dl,
  7596. getShiftAmountTy(WideVT, DAG.getDataLayout()));
  7597. TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
  7598. DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
  7599. } else {
  7600. if (VT.isVector())
  7601. return false;
  7602. // We can fall back to a libcall with an illegal type for the MUL if we
  7603. // have a libcall big enough.
  7604. // Also, we can fall back to a division in some cases, but that's a big
  7605. // performance hit in the general case.
  7606. RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
  7607. if (WideVT == MVT::i16)
  7608. LC = RTLIB::MUL_I16;
  7609. else if (WideVT == MVT::i32)
  7610. LC = RTLIB::MUL_I32;
  7611. else if (WideVT == MVT::i64)
  7612. LC = RTLIB::MUL_I64;
  7613. else if (WideVT == MVT::i128)
  7614. LC = RTLIB::MUL_I128;
  7615. assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!");
  7616. SDValue HiLHS;
  7617. SDValue HiRHS;
  7618. if (isSigned) {
  7619. // The high part is obtained by SRA'ing all but one of the bits of low
  7620. // part.
  7621. unsigned LoSize = VT.getFixedSizeInBits();
  7622. HiLHS =
  7623. DAG.getNode(ISD::SRA, dl, VT, LHS,
  7624. DAG.getConstant(LoSize - 1, dl,
  7625. getPointerTy(DAG.getDataLayout())));
  7626. HiRHS =
  7627. DAG.getNode(ISD::SRA, dl, VT, RHS,
  7628. DAG.getConstant(LoSize - 1, dl,
  7629. getPointerTy(DAG.getDataLayout())));
  7630. } else {
  7631. HiLHS = DAG.getConstant(0, dl, VT);
  7632. HiRHS = DAG.getConstant(0, dl, VT);
  7633. }
  7634. // Here we're passing the 2 arguments explicitly as 4 arguments that are
  7635. // pre-lowered to the correct types. This all depends upon WideVT not
  7636. // being a legal type for the architecture and thus has to be split to
  7637. // two arguments.
  7638. SDValue Ret;
  7639. TargetLowering::MakeLibCallOptions CallOptions;
  7640. CallOptions.setSExt(isSigned);
  7641. CallOptions.setIsPostTypeLegalization(true);
  7642. if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
  7643. // Halves of WideVT are packed into registers in different order
  7644. // depending on platform endianness. This is usually handled by
  7645. // the C calling convention, but we can't defer to it in
  7646. // the legalizer.
  7647. SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };
  7648. Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
  7649. } else {
  7650. SDValue Args[] = { HiLHS, LHS, HiRHS, RHS };
  7651. Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
  7652. }
  7653. assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
  7654. "Ret value is a collection of constituent nodes holding result.");
  7655. if (DAG.getDataLayout().isLittleEndian()) {
  7656. // Same as above.
  7657. BottomHalf = Ret.getOperand(0);
  7658. TopHalf = Ret.getOperand(1);
  7659. } else {
  7660. BottomHalf = Ret.getOperand(1);
  7661. TopHalf = Ret.getOperand(0);
  7662. }
  7663. }
  7664. Result = BottomHalf;
  7665. if (isSigned) {
  7666. SDValue ShiftAmt = DAG.getConstant(
  7667. VT.getScalarSizeInBits() - 1, dl,
  7668. getShiftAmountTy(BottomHalf.getValueType(), DAG.getDataLayout()));
  7669. SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
  7670. Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
  7671. } else {
  7672. Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
  7673. DAG.getConstant(0, dl, VT), ISD::SETNE);
  7674. }
  7675. // Truncate the result if SetCC returns a larger type than needed.
  7676. EVT RType = Node->getValueType(1);
  7677. if (RType.bitsLT(Overflow.getValueType()))
  7678. Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
  7679. assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
  7680. "Unexpected result type for S/UMULO legalization");
  7681. return true;
  7682. }
  7683. SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
  7684. SDLoc dl(Node);
  7685. unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
  7686. SDValue Op = Node->getOperand(0);
  7687. EVT VT = Op.getValueType();
  7688. if (VT.isScalableVector())
  7689. report_fatal_error(
  7690. "Expanding reductions for scalable vectors is undefined.");
  7691. // Try to use a shuffle reduction for power of two vectors.
  7692. if (VT.isPow2VectorType()) {
  7693. while (VT.getVectorNumElements() > 1) {
  7694. EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
  7695. if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
  7696. break;
  7697. SDValue Lo, Hi;
  7698. std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
  7699. Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi);
  7700. VT = HalfVT;
  7701. }
  7702. }
  7703. EVT EltVT = VT.getVectorElementType();
  7704. unsigned NumElts = VT.getVectorNumElements();
  7705. SmallVector<SDValue, 8> Ops;
  7706. DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
  7707. SDValue Res = Ops[0];
  7708. for (unsigned i = 1; i < NumElts; i++)
  7709. Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());
  7710. // Result type may be wider than element type.
  7711. if (EltVT != Node->getValueType(0))
  7712. Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
  7713. return Res;
  7714. }
  7715. SDValue TargetLowering::expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const {
  7716. SDLoc dl(Node);
  7717. SDValue AccOp = Node->getOperand(0);
  7718. SDValue VecOp = Node->getOperand(1);
  7719. SDNodeFlags Flags = Node->getFlags();
  7720. EVT VT = VecOp.getValueType();
  7721. EVT EltVT = VT.getVectorElementType();
  7722. if (VT.isScalableVector())
  7723. report_fatal_error(
  7724. "Expanding reductions for scalable vectors is undefined.");
  7725. unsigned NumElts = VT.getVectorNumElements();
  7726. SmallVector<SDValue, 8> Ops;
  7727. DAG.ExtractVectorElements(VecOp, Ops, 0, NumElts);
  7728. unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
  7729. SDValue Res = AccOp;
  7730. for (unsigned i = 0; i < NumElts; i++)
  7731. Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);
  7732. return Res;
  7733. }
  7734. bool TargetLowering::expandREM(SDNode *Node, SDValue &Result,
  7735. SelectionDAG &DAG) const {
  7736. EVT VT = Node->getValueType(0);
  7737. SDLoc dl(Node);
  7738. bool isSigned = Node->getOpcode() == ISD::SREM;
  7739. unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
  7740. unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
  7741. SDValue Dividend = Node->getOperand(0);
  7742. SDValue Divisor = Node->getOperand(1);
  7743. if (isOperationLegalOrCustom(DivRemOpc, VT)) {
  7744. SDVTList VTs = DAG.getVTList(VT, VT);
  7745. Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1);
  7746. return true;
  7747. }
  7748. if (isOperationLegalOrCustom(DivOpc, VT)) {
  7749. // X % Y -> X-X/Y*Y
  7750. SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor);
  7751. SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor);
  7752. Result = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
  7753. return true;
  7754. }
  7755. return false;
  7756. }
  7757. SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
  7758. SelectionDAG &DAG) const {
  7759. bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
  7760. SDLoc dl(SDValue(Node, 0));
  7761. SDValue Src = Node->getOperand(0);
  7762. // DstVT is the result type, while SatVT is the size to which we saturate
  7763. EVT SrcVT = Src.getValueType();
  7764. EVT DstVT = Node->getValueType(0);
  7765. EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
  7766. unsigned SatWidth = SatVT.getScalarSizeInBits();
  7767. unsigned DstWidth = DstVT.getScalarSizeInBits();
  7768. assert(SatWidth <= DstWidth &&
  7769. "Expected saturation width smaller than result width");
  7770. // Determine minimum and maximum integer values and their corresponding
  7771. // floating-point values.
  7772. APInt MinInt, MaxInt;
  7773. if (IsSigned) {
  7774. MinInt = APInt::getSignedMinValue(SatWidth).sextOrSelf(DstWidth);
  7775. MaxInt = APInt::getSignedMaxValue(SatWidth).sextOrSelf(DstWidth);
  7776. } else {
  7777. MinInt = APInt::getMinValue(SatWidth).zextOrSelf(DstWidth);
  7778. MaxInt = APInt::getMaxValue(SatWidth).zextOrSelf(DstWidth);
  7779. }
  7780. // We cannot risk emitting FP_TO_XINT nodes with a source VT of f16, as
  7781. // libcall emission cannot handle this. Large result types will fail.
  7782. if (SrcVT == MVT::f16) {
  7783. Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);
  7784. SrcVT = Src.getValueType();
  7785. }
  7786. APFloat MinFloat(DAG.EVTToAPFloatSemantics(SrcVT));
  7787. APFloat MaxFloat(DAG.EVTToAPFloatSemantics(SrcVT));
  7788. APFloat::opStatus MinStatus =
  7789. MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
  7790. APFloat::opStatus MaxStatus =
  7791. MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
  7792. bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
  7793. !(MaxStatus & APFloat::opStatus::opInexact);
  7794. SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
  7795. SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);
  7796. // If the integer bounds are exactly representable as floats and min/max are
  7797. // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
  7798. // of comparisons and selects.
  7799. bool MinMaxLegal = isOperationLegal(ISD::FMINNUM, SrcVT) &&
  7800. isOperationLegal(ISD::FMAXNUM, SrcVT);
  7801. if (AreExactFloatBounds && MinMaxLegal) {
  7802. SDValue Clamped = Src;
  7803. // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
  7804. Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, MinFloatNode);
  7805. // Clamp by MaxFloat from above. NaN cannot occur.
  7806. Clamped = DAG.getNode(ISD::FMINNUM, dl, SrcVT, Clamped, MaxFloatNode);
  7807. // Convert clamped value to integer.
  7808. SDValue FpToInt = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
  7809. dl, DstVT, Clamped);
  7810. // In the unsigned case we're done, because we mapped NaN to MinFloat,
  7811. // which will cast to zero.
  7812. if (!IsSigned)
  7813. return FpToInt;
  7814. // Otherwise, select 0 if Src is NaN.
  7815. SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
  7816. return DAG.getSelectCC(dl, Src, Src, ZeroInt, FpToInt,
  7817. ISD::CondCode::SETUO);
  7818. }
  7819. SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
  7820. SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT);
  7821. // Result of direct conversion. The assumption here is that the operation is
  7822. // non-trapping and it's fine to apply it to an out-of-range value if we
  7823. // select it away later.
  7824. SDValue FpToInt =
  7825. DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, DstVT, Src);
  7826. SDValue Select = FpToInt;
  7827. // If Src ULT MinFloat, select MinInt. In particular, this also selects
  7828. // MinInt if Src is NaN.
  7829. Select = DAG.getSelectCC(dl, Src, MinFloatNode, MinIntNode, Select,
  7830. ISD::CondCode::SETULT);
  7831. // If Src OGT MaxFloat, select MaxInt.
  7832. Select = DAG.getSelectCC(dl, Src, MaxFloatNode, MaxIntNode, Select,
  7833. ISD::CondCode::SETOGT);
  7834. // In the unsigned case we are done, because we mapped NaN to MinInt, which
  7835. // is already zero.
  7836. if (!IsSigned)
  7837. return Select;
  7838. // Otherwise, select 0 if Src is NaN.
  7839. SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
  7840. return DAG.getSelectCC(dl, Src, Src, ZeroInt, Select, ISD::CondCode::SETUO);
  7841. }
  7842. SDValue TargetLowering::expandVectorSplice(SDNode *Node,
  7843. SelectionDAG &DAG) const {
  7844. assert(Node->getOpcode() == ISD::VECTOR_SPLICE && "Unexpected opcode!");
  7845. assert(Node->getValueType(0).isScalableVector() &&
  7846. "Fixed length vector types expected to use SHUFFLE_VECTOR!");
  7847. EVT VT = Node->getValueType(0);
  7848. SDValue V1 = Node->getOperand(0);
  7849. SDValue V2 = Node->getOperand(1);
  7850. int64_t Imm = cast<ConstantSDNode>(Node->getOperand(2))->getSExtValue();
  7851. SDLoc DL(Node);
  7852. // Expand through memory thusly:
  7853. // Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
  7854. // Store V1, Ptr
  7855. // Store V2, Ptr + sizeof(V1)
  7856. // If (Imm < 0)
  7857. // TrailingElts = -Imm
  7858. // Ptr = Ptr + sizeof(V1) - (TrailingElts * sizeof(VT.Elt))
  7859. // else
  7860. // Ptr = Ptr + (Imm * sizeof(VT.Elt))
  7861. // Res = Load Ptr
  7862. Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
  7863. EVT MemVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
  7864. VT.getVectorElementCount() * 2);
  7865. SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
  7866. EVT PtrVT = StackPtr.getValueType();
  7867. auto &MF = DAG.getMachineFunction();
  7868. auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
  7869. auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
  7870. // Store the lo part of CONCAT_VECTORS(V1, V2)
  7871. SDValue StoreV1 = DAG.getStore(DAG.getEntryNode(), DL, V1, StackPtr, PtrInfo);
  7872. // Store the hi part of CONCAT_VECTORS(V1, V2)
  7873. SDValue OffsetToV2 = DAG.getVScale(
  7874. DL, PtrVT,
  7875. APInt(PtrVT.getFixedSizeInBits(), VT.getStoreSize().getKnownMinSize()));
  7876. SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, OffsetToV2);
  7877. SDValue StoreV2 = DAG.getStore(StoreV1, DL, V2, StackPtr2, PtrInfo);
  7878. if (Imm >= 0) {
  7879. // Load back the required element. getVectorElementPointer takes care of
  7880. // clamping the index if it's out-of-bounds.
  7881. StackPtr = getVectorElementPointer(DAG, StackPtr, VT, Node->getOperand(2));
  7882. // Load the spliced result
  7883. return DAG.getLoad(VT, DL, StoreV2, StackPtr,
  7884. MachinePointerInfo::getUnknownStack(MF));
  7885. }
  7886. uint64_t TrailingElts = -Imm;
  7887. // NOTE: TrailingElts must be clamped so as not to read outside of V1:V2.
  7888. TypeSize EltByteSize = VT.getVectorElementType().getStoreSize();
  7889. SDValue TrailingBytes =
  7890. DAG.getConstant(TrailingElts * EltByteSize, DL, PtrVT);
  7891. if (TrailingElts > VT.getVectorMinNumElements()) {
  7892. SDValue VLBytes = DAG.getVScale(
  7893. DL, PtrVT,
  7894. APInt(PtrVT.getFixedSizeInBits(), VT.getStoreSize().getKnownMinSize()));
  7895. TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VLBytes);
  7896. }
  7897. // Calculate the start address of the spliced result.
  7898. StackPtr2 = DAG.getNode(ISD::SUB, DL, PtrVT, StackPtr2, TrailingBytes);
  7899. // Load the spliced result
  7900. return DAG.getLoad(VT, DL, StoreV2, StackPtr2,
  7901. MachinePointerInfo::getUnknownStack(MF));
  7902. }
  7903. bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT,
  7904. SDValue &LHS, SDValue &RHS,
  7905. SDValue &CC, bool &NeedInvert,
  7906. const SDLoc &dl, SDValue &Chain,
  7907. bool IsSignaling) const {
  7908. const TargetLowering &TLI = DAG.getTargetLoweringInfo();
  7909. MVT OpVT = LHS.getSimpleValueType();
  7910. ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
  7911. NeedInvert = false;
  7912. switch (TLI.getCondCodeAction(CCCode, OpVT)) {
  7913. default:
  7914. llvm_unreachable("Unknown condition code action!");
  7915. case TargetLowering::Legal:
  7916. // Nothing to do.
  7917. break;
  7918. case TargetLowering::Expand: {
  7919. ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(CCCode);
  7920. if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
  7921. std::swap(LHS, RHS);
  7922. CC = DAG.getCondCode(InvCC);
  7923. return true;
  7924. }
  7925. // Swapping operands didn't work. Try inverting the condition.
  7926. bool NeedSwap = false;
  7927. InvCC = getSetCCInverse(CCCode, OpVT);
  7928. if (!TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
  7929. // If inverting the condition is not enough, try swapping operands
  7930. // on top of it.
  7931. InvCC = ISD::getSetCCSwappedOperands(InvCC);
  7932. NeedSwap = true;
  7933. }
  7934. if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
  7935. CC = DAG.getCondCode(InvCC);
  7936. NeedInvert = true;
  7937. if (NeedSwap)
  7938. std::swap(LHS, RHS);
  7939. return true;
  7940. }
  7941. ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
  7942. unsigned Opc = 0;
  7943. switch (CCCode) {
  7944. default:
  7945. llvm_unreachable("Don't know how to expand this condition!");
  7946. case ISD::SETUO:
  7947. if (TLI.isCondCodeLegal(ISD::SETUNE, OpVT)) {
  7948. CC1 = ISD::SETUNE;
  7949. CC2 = ISD::SETUNE;
  7950. Opc = ISD::OR;
  7951. break;
  7952. }
  7953. assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
  7954. "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
  7955. NeedInvert = true;
  7956. LLVM_FALLTHROUGH;
  7957. case ISD::SETO:
  7958. assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
  7959. "If SETO is expanded, SETOEQ must be legal!");
  7960. CC1 = ISD::SETOEQ;
  7961. CC2 = ISD::SETOEQ;
  7962. Opc = ISD::AND;
  7963. break;
  7964. case ISD::SETONE:
  7965. case ISD::SETUEQ:
  7966. // If the SETUO or SETO CC isn't legal, we might be able to use
  7967. // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
  7968. // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
  7969. // the operands.
  7970. CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
  7971. if (!TLI.isCondCodeLegal(CC2, OpVT) &&
  7972. (TLI.isCondCodeLegal(ISD::SETOGT, OpVT) ||
  7973. TLI.isCondCodeLegal(ISD::SETOLT, OpVT))) {
  7974. CC1 = ISD::SETOGT;
  7975. CC2 = ISD::SETOLT;
  7976. Opc = ISD::OR;
  7977. NeedInvert = ((unsigned)CCCode & 0x8U);
  7978. break;
  7979. }
  7980. LLVM_FALLTHROUGH;
  7981. case ISD::SETOEQ:
  7982. case ISD::SETOGT:
  7983. case ISD::SETOGE:
  7984. case ISD::SETOLT:
  7985. case ISD::SETOLE:
  7986. case ISD::SETUNE:
  7987. case ISD::SETUGT:
  7988. case ISD::SETUGE:
  7989. case ISD::SETULT:
  7990. case ISD::SETULE:
  7991. // If we are floating point, assign and break, otherwise fall through.
  7992. if (!OpVT.isInteger()) {
  7993. // We can use the 4th bit to tell if we are the unordered
  7994. // or ordered version of the opcode.
  7995. CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
  7996. Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
  7997. CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
  7998. break;
  7999. }
  8000. // Fallthrough if we are unsigned integer.
  8001. LLVM_FALLTHROUGH;
  8002. case ISD::SETLE:
  8003. case ISD::SETGT:
  8004. case ISD::SETGE:
  8005. case ISD::SETLT:
  8006. case ISD::SETNE:
  8007. case ISD::SETEQ:
  8008. // If all combinations of inverting the condition and swapping operands
  8009. // didn't work then we have no means to expand the condition.
  8010. llvm_unreachable("Don't know how to expand this condition!");
  8011. }
  8012. SDValue SetCC1, SetCC2;
  8013. if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
  8014. // If we aren't the ordered or unorder operation,
  8015. // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
  8016. SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
  8017. SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
  8018. } else {
  8019. // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
  8020. SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
  8021. SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
  8022. }
  8023. if (Chain)
  8024. Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
  8025. SetCC2.getValue(1));
  8026. LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
  8027. RHS = SDValue();
  8028. CC = SDValue();
  8029. return true;
  8030. }
  8031. }
  8032. return false;
  8033. }