PPCISelDAGToDAG.cpp 266 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499650065016502650365046505650665076508650965106511651265136514651565166517651865196520652165226523652465256526652765286529653065316532653365346535653665376538653965406541654265436544654565466547654865496550655165526553655465556556655765586559656065616562656365646565656665676568656965706571657265736574657565766577657865796580658165826583658465856586658765886589659065916592659365946595659665976598659966006601660266036604660566066607660866096610661166126613661466156616661766186619662066216622662366246625662666276628662966306631663266336634663566366637663866396640664166426643664466456646664766486649665066516652665366546655665666576658665966606661666266636664666566666667666866696670667166726673667466756676667766786679668066816682668366846685668666876688668966906691669266936694669566966697669866996700670167026703670467056706670767086709671067116712671367146715671667176718671967206721672267236724672567266727672867296730673167326733673467356736673767386739674067416742674367446745674667476748674967506751675267536754675567566757675867596760676167626763676467656766676767686769677067716772677367746775677667776778677967806781678267836784678567866787678867896790679167926793679467956796679767986799680068016802680368046805680668076808680968106811681268136814681568166817681868196820682168226823682468256826682768286829683068316832683368346835683668376838683968406841684268436844684568466847684868496850685168526853685468556856685768586859686068616862686368646865686668676868686968706871687268736874687568766877687868796880688168826883688468856886688768886889689068916892689368946895689668976898689969006901690269036904
  1. //===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines a pattern matching instruction selector for PowerPC,
  10. // converting from a legalized dag to a PPC dag.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "MCTargetDesc/PPCMCTargetDesc.h"
  14. #include "MCTargetDesc/PPCPredicates.h"
  15. #include "PPC.h"
  16. #include "PPCISelLowering.h"
  17. #include "PPCMachineFunctionInfo.h"
  18. #include "PPCSubtarget.h"
  19. #include "PPCTargetMachine.h"
  20. #include "llvm/ADT/APInt.h"
  21. #include "llvm/ADT/DenseMap.h"
  22. #include "llvm/ADT/STLExtras.h"
  23. #include "llvm/ADT/SmallPtrSet.h"
  24. #include "llvm/ADT/SmallVector.h"
  25. #include "llvm/ADT/Statistic.h"
  26. #include "llvm/Analysis/BranchProbabilityInfo.h"
  27. #include "llvm/CodeGen/FunctionLoweringInfo.h"
  28. #include "llvm/CodeGen/ISDOpcodes.h"
  29. #include "llvm/CodeGen/MachineBasicBlock.h"
  30. #include "llvm/CodeGen/MachineFunction.h"
  31. #include "llvm/CodeGen/MachineInstrBuilder.h"
  32. #include "llvm/CodeGen/MachineRegisterInfo.h"
  33. #include "llvm/CodeGen/SelectionDAG.h"
  34. #include "llvm/CodeGen/SelectionDAGISel.h"
  35. #include "llvm/CodeGen/SelectionDAGNodes.h"
  36. #include "llvm/CodeGen/TargetInstrInfo.h"
  37. #include "llvm/CodeGen/TargetRegisterInfo.h"
  38. #include "llvm/CodeGen/ValueTypes.h"
  39. #include "llvm/IR/BasicBlock.h"
  40. #include "llvm/IR/DebugLoc.h"
  41. #include "llvm/IR/Function.h"
  42. #include "llvm/IR/GlobalValue.h"
  43. #include "llvm/IR/InlineAsm.h"
  44. #include "llvm/IR/InstrTypes.h"
  45. #include "llvm/IR/IntrinsicsPowerPC.h"
  46. #include "llvm/IR/Module.h"
  47. #include "llvm/Support/Casting.h"
  48. #include "llvm/Support/CodeGen.h"
  49. #include "llvm/Support/CommandLine.h"
  50. #include "llvm/Support/Compiler.h"
  51. #include "llvm/Support/Debug.h"
  52. #include "llvm/Support/ErrorHandling.h"
  53. #include "llvm/Support/KnownBits.h"
  54. #include "llvm/Support/MachineValueType.h"
  55. #include "llvm/Support/MathExtras.h"
  56. #include "llvm/Support/raw_ostream.h"
  57. #include <algorithm>
  58. #include <cassert>
  59. #include <cstdint>
  60. #include <iterator>
  61. #include <limits>
  62. #include <memory>
  63. #include <new>
  64. #include <tuple>
  65. #include <utility>
  66. using namespace llvm;
  67. #define DEBUG_TYPE "ppc-codegen"
  68. STATISTIC(NumSextSetcc,
  69. "Number of (sext(setcc)) nodes expanded into GPR sequence.");
  70. STATISTIC(NumZextSetcc,
  71. "Number of (zext(setcc)) nodes expanded into GPR sequence.");
  72. STATISTIC(SignExtensionsAdded,
  73. "Number of sign extensions for compare inputs added.");
  74. STATISTIC(ZeroExtensionsAdded,
  75. "Number of zero extensions for compare inputs added.");
  76. STATISTIC(NumLogicOpsOnComparison,
  77. "Number of logical ops on i1 values calculated in GPR.");
  78. STATISTIC(OmittedForNonExtendUses,
  79. "Number of compares not eliminated as they have non-extending uses.");
  80. STATISTIC(NumP9Setb,
  81. "Number of compares lowered to setb.");
  82. // FIXME: Remove this once the bug has been fixed!
  83. cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
  84. cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden);
  85. static cl::opt<bool>
  86. UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true),
  87. cl::desc("use aggressive ppc isel for bit permutations"),
  88. cl::Hidden);
  89. static cl::opt<bool> BPermRewriterNoMasking(
  90. "ppc-bit-perm-rewriter-stress-rotates",
  91. cl::desc("stress rotate selection in aggressive ppc isel for "
  92. "bit permutations"),
  93. cl::Hidden);
  94. static cl::opt<bool> EnableBranchHint(
  95. "ppc-use-branch-hint", cl::init(true),
  96. cl::desc("Enable static hinting of branches on ppc"),
  97. cl::Hidden);
  98. static cl::opt<bool> EnableTLSOpt(
  99. "ppc-tls-opt", cl::init(true),
  100. cl::desc("Enable tls optimization peephole"),
  101. cl::Hidden);
  102. enum ICmpInGPRType { ICGPR_All, ICGPR_None, ICGPR_I32, ICGPR_I64,
  103. ICGPR_NonExtIn, ICGPR_Zext, ICGPR_Sext, ICGPR_ZextI32,
  104. ICGPR_SextI32, ICGPR_ZextI64, ICGPR_SextI64 };
  105. static cl::opt<ICmpInGPRType> CmpInGPR(
  106. "ppc-gpr-icmps", cl::Hidden, cl::init(ICGPR_All),
  107. cl::desc("Specify the types of comparisons to emit GPR-only code for."),
  108. cl::values(clEnumValN(ICGPR_None, "none", "Do not modify integer comparisons."),
  109. clEnumValN(ICGPR_All, "all", "All possible int comparisons in GPRs."),
  110. clEnumValN(ICGPR_I32, "i32", "Only i32 comparisons in GPRs."),
  111. clEnumValN(ICGPR_I64, "i64", "Only i64 comparisons in GPRs."),
  112. clEnumValN(ICGPR_NonExtIn, "nonextin",
  113. "Only comparisons where inputs don't need [sz]ext."),
  114. clEnumValN(ICGPR_Zext, "zext", "Only comparisons with zext result."),
  115. clEnumValN(ICGPR_ZextI32, "zexti32",
  116. "Only i32 comparisons with zext result."),
  117. clEnumValN(ICGPR_ZextI64, "zexti64",
  118. "Only i64 comparisons with zext result."),
  119. clEnumValN(ICGPR_Sext, "sext", "Only comparisons with sext result."),
  120. clEnumValN(ICGPR_SextI32, "sexti32",
  121. "Only i32 comparisons with sext result."),
  122. clEnumValN(ICGPR_SextI64, "sexti64",
  123. "Only i64 comparisons with sext result.")));
  124. namespace {
  125. //===--------------------------------------------------------------------===//
  126. /// PPCDAGToDAGISel - PPC specific code to select PPC machine
  127. /// instructions for SelectionDAG operations.
  128. ///
  129. class PPCDAGToDAGISel : public SelectionDAGISel {
  130. const PPCTargetMachine &TM;
  131. const PPCSubtarget *Subtarget = nullptr;
  132. const PPCTargetLowering *PPCLowering = nullptr;
  133. unsigned GlobalBaseReg = 0;
  134. public:
  135. explicit PPCDAGToDAGISel(PPCTargetMachine &tm, CodeGenOpt::Level OptLevel)
  136. : SelectionDAGISel(tm, OptLevel), TM(tm) {}
  137. bool runOnMachineFunction(MachineFunction &MF) override {
  138. // Make sure we re-emit a set of the global base reg if necessary
  139. GlobalBaseReg = 0;
  140. Subtarget = &MF.getSubtarget<PPCSubtarget>();
  141. PPCLowering = Subtarget->getTargetLowering();
  142. SelectionDAGISel::runOnMachineFunction(MF);
  143. return true;
  144. }
  145. void PreprocessISelDAG() override;
  146. void PostprocessISelDAG() override;
  147. /// getI16Imm - Return a target constant with the specified value, of type
  148. /// i16.
  149. inline SDValue getI16Imm(unsigned Imm, const SDLoc &dl) {
  150. return CurDAG->getTargetConstant(Imm, dl, MVT::i16);
  151. }
  152. /// getI32Imm - Return a target constant with the specified value, of type
  153. /// i32.
  154. inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
  155. return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
  156. }
  157. /// getI64Imm - Return a target constant with the specified value, of type
  158. /// i64.
  159. inline SDValue getI64Imm(uint64_t Imm, const SDLoc &dl) {
  160. return CurDAG->getTargetConstant(Imm, dl, MVT::i64);
  161. }
  162. /// getSmallIPtrImm - Return a target constant of pointer type.
  163. inline SDValue getSmallIPtrImm(unsigned Imm, const SDLoc &dl) {
  164. return CurDAG->getTargetConstant(
  165. Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout()));
  166. }
  167. /// isRotateAndMask - Returns true if Mask and Shift can be folded into a
  168. /// rotate and mask opcode and mask operation.
  169. static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask,
  170. unsigned &SH, unsigned &MB, unsigned &ME);
  171. /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
  172. /// base register. Return the virtual register that holds this value.
  173. SDNode *getGlobalBaseReg();
  174. void selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset = 0);
  175. // Select - Convert the specified operand from a target-independent to a
  176. // target-specific node if it hasn't already been changed.
  177. void Select(SDNode *N) override;
  178. bool tryBitfieldInsert(SDNode *N);
  179. bool tryBitPermutation(SDNode *N);
  180. bool tryIntCompareInGPR(SDNode *N);
  181. // tryTLSXFormLoad - Convert an ISD::LOAD fed by a PPCISD::ADD_TLS into
  182. // an X-Form load instruction with the offset being a relocation coming from
  183. // the PPCISD::ADD_TLS.
  184. bool tryTLSXFormLoad(LoadSDNode *N);
  185. // tryTLSXFormStore - Convert an ISD::STORE fed by a PPCISD::ADD_TLS into
  186. // an X-Form store instruction with the offset being a relocation coming from
  187. // the PPCISD::ADD_TLS.
  188. bool tryTLSXFormStore(StoreSDNode *N);
  189. /// SelectCC - Select a comparison of the specified values with the
  190. /// specified condition code, returning the CR# of the expression.
  191. SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
  192. const SDLoc &dl, SDValue Chain = SDValue());
  193. /// SelectAddrImmOffs - Return true if the operand is valid for a preinc
  194. /// immediate field. Note that the operand at this point is already the
  195. /// result of a prior SelectAddressRegImm call.
  196. bool SelectAddrImmOffs(SDValue N, SDValue &Out) const {
  197. if (N.getOpcode() == ISD::TargetConstant ||
  198. N.getOpcode() == ISD::TargetGlobalAddress) {
  199. Out = N;
  200. return true;
  201. }
  202. return false;
  203. }
  204. /// SelectAddrIdx - Given the specified address, check to see if it can be
  205. /// represented as an indexed [r+r] operation.
  206. /// This is for xform instructions whose associated displacement form is D.
  207. /// The last parameter \p 0 means associated D form has no requirment for 16
  208. /// bit signed displacement.
  209. /// Returns false if it can be represented by [r+imm], which are preferred.
  210. bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) {
  211. return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, None);
  212. }
  213. /// SelectAddrIdx4 - Given the specified address, check to see if it can be
  214. /// represented as an indexed [r+r] operation.
  215. /// This is for xform instructions whose associated displacement form is DS.
  216. /// The last parameter \p 4 means associated DS form 16 bit signed
  217. /// displacement must be a multiple of 4.
  218. /// Returns false if it can be represented by [r+imm], which are preferred.
  219. bool SelectAddrIdxX4(SDValue N, SDValue &Base, SDValue &Index) {
  220. return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG,
  221. Align(4));
  222. }
  223. /// SelectAddrIdx16 - Given the specified address, check to see if it can be
  224. /// represented as an indexed [r+r] operation.
  225. /// This is for xform instructions whose associated displacement form is DQ.
  226. /// The last parameter \p 16 means associated DQ form 16 bit signed
  227. /// displacement must be a multiple of 16.
  228. /// Returns false if it can be represented by [r+imm], which are preferred.
  229. bool SelectAddrIdxX16(SDValue N, SDValue &Base, SDValue &Index) {
  230. return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG,
  231. Align(16));
  232. }
  233. /// SelectAddrIdxOnly - Given the specified address, force it to be
  234. /// represented as an indexed [r+r] operation.
  235. bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) {
  236. return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
  237. }
  238. /// SelectAddrImm - Returns true if the address N can be represented by
  239. /// a base register plus a signed 16-bit displacement [r+imm].
  240. /// The last parameter \p 0 means D form has no requirment for 16 bit signed
  241. /// displacement.
  242. bool SelectAddrImm(SDValue N, SDValue &Disp,
  243. SDValue &Base) {
  244. return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, None);
  245. }
  246. /// SelectAddrImmX4 - Returns true if the address N can be represented by
  247. /// a base register plus a signed 16-bit displacement that is a multiple of
  248. /// 4 (last parameter). Suitable for use by STD and friends.
  249. bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) {
  250. return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, Align(4));
  251. }
  252. /// SelectAddrImmX16 - Returns true if the address N can be represented by
  253. /// a base register plus a signed 16-bit displacement that is a multiple of
  254. /// 16(last parameter). Suitable for use by STXV and friends.
  255. bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) {
  256. return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG,
  257. Align(16));
  258. }
  259. /// SelectAddrImmX34 - Returns true if the address N can be represented by
  260. /// a base register plus a signed 34-bit displacement. Suitable for use by
  261. /// PSTXVP and friends.
  262. bool SelectAddrImmX34(SDValue N, SDValue &Disp, SDValue &Base) {
  263. return PPCLowering->SelectAddressRegImm34(N, Disp, Base, *CurDAG);
  264. }
  265. // Select an address into a single register.
  266. bool SelectAddr(SDValue N, SDValue &Base) {
  267. Base = N;
  268. return true;
  269. }
  270. bool SelectAddrPCRel(SDValue N, SDValue &Base) {
  271. return PPCLowering->SelectAddressPCRel(N, Base);
  272. }
  273. /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
  274. /// inline asm expressions. It is always correct to compute the value into
  275. /// a register. The case of adding a (possibly relocatable) constant to a
  276. /// register can be improved, but it is wrong to substitute Reg+Reg for
  277. /// Reg in an asm, because the load or store opcode would have to change.
  278. bool SelectInlineAsmMemoryOperand(const SDValue &Op,
  279. unsigned ConstraintID,
  280. std::vector<SDValue> &OutOps) override {
  281. switch(ConstraintID) {
  282. default:
  283. errs() << "ConstraintID: " << ConstraintID << "\n";
  284. llvm_unreachable("Unexpected asm memory constraint");
  285. case InlineAsm::Constraint_es:
  286. case InlineAsm::Constraint_m:
  287. case InlineAsm::Constraint_o:
  288. case InlineAsm::Constraint_Q:
  289. case InlineAsm::Constraint_Z:
  290. case InlineAsm::Constraint_Zy:
  291. // We need to make sure that this one operand does not end up in r0
  292. // (because we might end up lowering this as 0(%op)).
  293. const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
  294. const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1);
  295. SDLoc dl(Op);
  296. SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32);
  297. SDValue NewOp =
  298. SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
  299. dl, Op.getValueType(),
  300. Op, RC), 0);
  301. OutOps.push_back(NewOp);
  302. return false;
  303. }
  304. return true;
  305. }
  306. StringRef getPassName() const override {
  307. return "PowerPC DAG->DAG Pattern Instruction Selection";
  308. }
  309. // Include the pieces autogenerated from the target description.
  310. #include "PPCGenDAGISel.inc"
  311. private:
  312. bool trySETCC(SDNode *N);
  313. bool tryFoldSWTestBRCC(SDNode *N);
  314. bool tryAsSingleRLDICL(SDNode *N);
  315. bool tryAsSingleRLDICR(SDNode *N);
  316. bool tryAsSingleRLWINM(SDNode *N);
  317. bool tryAsSingleRLWINM8(SDNode *N);
  318. bool tryAsSingleRLWIMI(SDNode *N);
  319. bool tryAsPairOfRLDICL(SDNode *N);
  320. bool tryAsSingleRLDIMI(SDNode *N);
  321. void PeepholePPC64();
  322. void PeepholePPC64ZExt();
  323. void PeepholeCROps();
  324. SDValue combineToCMPB(SDNode *N);
  325. void foldBoolExts(SDValue &Res, SDNode *&N);
  326. bool AllUsersSelectZero(SDNode *N);
  327. void SwapAllSelectUsers(SDNode *N);
  328. bool isOffsetMultipleOf(SDNode *N, unsigned Val) const;
  329. void transferMemOperands(SDNode *N, SDNode *Result);
  330. };
  331. } // end anonymous namespace
  332. /// getGlobalBaseReg - Output the instructions required to put the
  333. /// base address to use for accessing globals into a register.
  334. ///
  335. SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
  336. if (!GlobalBaseReg) {
  337. const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
  338. // Insert the set of GlobalBaseReg into the first MBB of the function
  339. MachineBasicBlock &FirstMBB = MF->front();
  340. MachineBasicBlock::iterator MBBI = FirstMBB.begin();
  341. const Module *M = MF->getFunction().getParent();
  342. DebugLoc dl;
  343. if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) {
  344. if (Subtarget->isTargetELF()) {
  345. GlobalBaseReg = PPC::R30;
  346. if (!Subtarget->isSecurePlt() &&
  347. M->getPICLevel() == PICLevel::SmallPIC) {
  348. BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR));
  349. BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
  350. MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
  351. } else {
  352. BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
  353. BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
  354. Register TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
  355. BuildMI(FirstMBB, MBBI, dl,
  356. TII.get(PPC::UpdateGBR), GlobalBaseReg)
  357. .addReg(TempReg, RegState::Define).addReg(GlobalBaseReg);
  358. MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
  359. }
  360. } else {
  361. GlobalBaseReg =
  362. RegInfo->createVirtualRegister(&PPC::GPRC_and_GPRC_NOR0RegClass);
  363. BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
  364. BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
  365. }
  366. } else {
  367. // We must ensure that this sequence is dominated by the prologue.
  368. // FIXME: This is a bit of a big hammer since we don't get the benefits
  369. // of shrink-wrapping whenever we emit this instruction. Considering
  370. // this is used in any function where we emit a jump table, this may be
  371. // a significant limitation. We should consider inserting this in the
  372. // block where it is used and then commoning this sequence up if it
  373. // appears in multiple places.
  374. // Note: on ISA 3.0 cores, we can use lnia (addpcis) instead of
  375. // MovePCtoLR8.
  376. MF->getInfo<PPCFunctionInfo>()->setShrinkWrapDisabled(true);
  377. GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
  378. BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8));
  379. BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg);
  380. }
  381. }
  382. return CurDAG->getRegister(GlobalBaseReg,
  383. PPCLowering->getPointerTy(CurDAG->getDataLayout()))
  384. .getNode();
  385. }
  386. /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
  387. /// operand. If so Imm will receive the 32-bit value.
  388. static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
  389. if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
  390. Imm = cast<ConstantSDNode>(N)->getZExtValue();
  391. return true;
  392. }
  393. return false;
  394. }
  395. /// isInt64Immediate - This method tests to see if the node is a 64-bit constant
  396. /// operand. If so Imm will receive the 64-bit value.
  397. static bool isInt64Immediate(SDNode *N, uint64_t &Imm) {
  398. if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i64) {
  399. Imm = cast<ConstantSDNode>(N)->getZExtValue();
  400. return true;
  401. }
  402. return false;
  403. }
  404. // isInt32Immediate - This method tests to see if a constant operand.
  405. // If so Imm will receive the 32 bit value.
  406. static bool isInt32Immediate(SDValue N, unsigned &Imm) {
  407. return isInt32Immediate(N.getNode(), Imm);
  408. }
  409. /// isInt64Immediate - This method tests to see if the value is a 64-bit
  410. /// constant operand. If so Imm will receive the 64-bit value.
  411. static bool isInt64Immediate(SDValue N, uint64_t &Imm) {
  412. return isInt64Immediate(N.getNode(), Imm);
  413. }
  414. static unsigned getBranchHint(unsigned PCC,
  415. const FunctionLoweringInfo &FuncInfo,
  416. const SDValue &DestMBB) {
  417. assert(isa<BasicBlockSDNode>(DestMBB));
  418. if (!FuncInfo.BPI) return PPC::BR_NO_HINT;
  419. const BasicBlock *BB = FuncInfo.MBB->getBasicBlock();
  420. const Instruction *BBTerm = BB->getTerminator();
  421. if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT;
  422. const BasicBlock *TBB = BBTerm->getSuccessor(0);
  423. const BasicBlock *FBB = BBTerm->getSuccessor(1);
  424. auto TProb = FuncInfo.BPI->getEdgeProbability(BB, TBB);
  425. auto FProb = FuncInfo.BPI->getEdgeProbability(BB, FBB);
  426. // We only want to handle cases which are easy to predict at static time, e.g.
  427. // C++ throw statement, that is very likely not taken, or calling never
  428. // returned function, e.g. stdlib exit(). So we set Threshold to filter
  429. // unwanted cases.
  430. //
  431. // Below is LLVM branch weight table, we only want to handle case 1, 2
  432. //
  433. // Case Taken:Nontaken Example
  434. // 1. Unreachable 1048575:1 C++ throw, stdlib exit(),
  435. // 2. Invoke-terminating 1:1048575
  436. // 3. Coldblock 4:64 __builtin_expect
  437. // 4. Loop Branch 124:4 For loop
  438. // 5. PH/ZH/FPH 20:12
  439. const uint32_t Threshold = 10000;
  440. if (std::max(TProb, FProb) / Threshold < std::min(TProb, FProb))
  441. return PPC::BR_NO_HINT;
  442. LLVM_DEBUG(dbgs() << "Use branch hint for '" << FuncInfo.Fn->getName()
  443. << "::" << BB->getName() << "'\n"
  444. << " -> " << TBB->getName() << ": " << TProb << "\n"
  445. << " -> " << FBB->getName() << ": " << FProb << "\n");
  446. const BasicBlockSDNode *BBDN = cast<BasicBlockSDNode>(DestMBB);
  447. // If Dest BasicBlock is False-BasicBlock (FBB), swap branch probabilities,
  448. // because we want 'TProb' stands for 'branch probability' to Dest BasicBlock
  449. if (BBDN->getBasicBlock()->getBasicBlock() != TBB)
  450. std::swap(TProb, FProb);
  451. return (TProb > FProb) ? PPC::BR_TAKEN_HINT : PPC::BR_NONTAKEN_HINT;
  452. }
  453. // isOpcWithIntImmediate - This method tests to see if the node is a specific
  454. // opcode and that it has a immediate integer right operand.
  455. // If so Imm will receive the 32 bit value.
  456. static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
  457. return N->getOpcode() == Opc
  458. && isInt32Immediate(N->getOperand(1).getNode(), Imm);
  459. }
  460. void PPCDAGToDAGISel::selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset) {
  461. SDLoc dl(SN);
  462. int FI = cast<FrameIndexSDNode>(N)->getIndex();
  463. SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0));
  464. unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8;
  465. if (SN->hasOneUse())
  466. CurDAG->SelectNodeTo(SN, Opc, N->getValueType(0), TFI,
  467. getSmallIPtrImm(Offset, dl));
  468. else
  469. ReplaceNode(SN, CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI,
  470. getSmallIPtrImm(Offset, dl)));
  471. }
  472. bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
  473. bool isShiftMask, unsigned &SH,
  474. unsigned &MB, unsigned &ME) {
  475. // Don't even go down this path for i64, since different logic will be
  476. // necessary for rldicl/rldicr/rldimi.
  477. if (N->getValueType(0) != MVT::i32)
  478. return false;
  479. unsigned Shift = 32;
  480. unsigned Indeterminant = ~0; // bit mask marking indeterminant results
  481. unsigned Opcode = N->getOpcode();
  482. if (N->getNumOperands() != 2 ||
  483. !isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31))
  484. return false;
  485. if (Opcode == ISD::SHL) {
  486. // apply shift left to mask if it comes first
  487. if (isShiftMask) Mask = Mask << Shift;
  488. // determine which bits are made indeterminant by shift
  489. Indeterminant = ~(0xFFFFFFFFu << Shift);
  490. } else if (Opcode == ISD::SRL) {
  491. // apply shift right to mask if it comes first
  492. if (isShiftMask) Mask = Mask >> Shift;
  493. // determine which bits are made indeterminant by shift
  494. Indeterminant = ~(0xFFFFFFFFu >> Shift);
  495. // adjust for the left rotate
  496. Shift = 32 - Shift;
  497. } else if (Opcode == ISD::ROTL) {
  498. Indeterminant = 0;
  499. } else {
  500. return false;
  501. }
  502. // if the mask doesn't intersect any Indeterminant bits
  503. if (Mask && !(Mask & Indeterminant)) {
  504. SH = Shift & 31;
  505. // make sure the mask is still a mask (wrap arounds may not be)
  506. return isRunOfOnes(Mask, MB, ME);
  507. }
  508. return false;
  509. }
  510. bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) {
  511. SDValue Base = ST->getBasePtr();
  512. if (Base.getOpcode() != PPCISD::ADD_TLS)
  513. return false;
  514. SDValue Offset = ST->getOffset();
  515. if (!Offset.isUndef())
  516. return false;
  517. if (Base.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR)
  518. return false;
  519. SDLoc dl(ST);
  520. EVT MemVT = ST->getMemoryVT();
  521. EVT RegVT = ST->getValue().getValueType();
  522. unsigned Opcode;
  523. switch (MemVT.getSimpleVT().SimpleTy) {
  524. default:
  525. return false;
  526. case MVT::i8: {
  527. Opcode = (RegVT == MVT::i32) ? PPC::STBXTLS_32 : PPC::STBXTLS;
  528. break;
  529. }
  530. case MVT::i16: {
  531. Opcode = (RegVT == MVT::i32) ? PPC::STHXTLS_32 : PPC::STHXTLS;
  532. break;
  533. }
  534. case MVT::i32: {
  535. Opcode = (RegVT == MVT::i32) ? PPC::STWXTLS_32 : PPC::STWXTLS;
  536. break;
  537. }
  538. case MVT::i64: {
  539. Opcode = PPC::STDXTLS;
  540. break;
  541. }
  542. }
  543. SDValue Chain = ST->getChain();
  544. SDVTList VTs = ST->getVTList();
  545. SDValue Ops[] = {ST->getValue(), Base.getOperand(0), Base.getOperand(1),
  546. Chain};
  547. SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
  548. transferMemOperands(ST, MN);
  549. ReplaceNode(ST, MN);
  550. return true;
  551. }
  552. bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) {
  553. SDValue Base = LD->getBasePtr();
  554. if (Base.getOpcode() != PPCISD::ADD_TLS)
  555. return false;
  556. SDValue Offset = LD->getOffset();
  557. if (!Offset.isUndef())
  558. return false;
  559. if (Base.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR)
  560. return false;
  561. SDLoc dl(LD);
  562. EVT MemVT = LD->getMemoryVT();
  563. EVT RegVT = LD->getValueType(0);
  564. unsigned Opcode;
  565. switch (MemVT.getSimpleVT().SimpleTy) {
  566. default:
  567. return false;
  568. case MVT::i8: {
  569. Opcode = (RegVT == MVT::i32) ? PPC::LBZXTLS_32 : PPC::LBZXTLS;
  570. break;
  571. }
  572. case MVT::i16: {
  573. Opcode = (RegVT == MVT::i32) ? PPC::LHZXTLS_32 : PPC::LHZXTLS;
  574. break;
  575. }
  576. case MVT::i32: {
  577. Opcode = (RegVT == MVT::i32) ? PPC::LWZXTLS_32 : PPC::LWZXTLS;
  578. break;
  579. }
  580. case MVT::i64: {
  581. Opcode = PPC::LDXTLS;
  582. break;
  583. }
  584. }
  585. SDValue Chain = LD->getChain();
  586. SDVTList VTs = LD->getVTList();
  587. SDValue Ops[] = {Base.getOperand(0), Base.getOperand(1), Chain};
  588. SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
  589. transferMemOperands(LD, MN);
  590. ReplaceNode(LD, MN);
  591. return true;
  592. }
  593. /// Turn an or of two masked values into the rotate left word immediate then
  594. /// mask insert (rlwimi) instruction.
  595. bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {
  596. SDValue Op0 = N->getOperand(0);
  597. SDValue Op1 = N->getOperand(1);
  598. SDLoc dl(N);
  599. KnownBits LKnown = CurDAG->computeKnownBits(Op0);
  600. KnownBits RKnown = CurDAG->computeKnownBits(Op1);
  601. unsigned TargetMask = LKnown.Zero.getZExtValue();
  602. unsigned InsertMask = RKnown.Zero.getZExtValue();
  603. if ((TargetMask | InsertMask) == 0xFFFFFFFF) {
  604. unsigned Op0Opc = Op0.getOpcode();
  605. unsigned Op1Opc = Op1.getOpcode();
  606. unsigned Value, SH = 0;
  607. TargetMask = ~TargetMask;
  608. InsertMask = ~InsertMask;
  609. // If the LHS has a foldable shift and the RHS does not, then swap it to the
  610. // RHS so that we can fold the shift into the insert.
  611. if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) {
  612. if (Op0.getOperand(0).getOpcode() == ISD::SHL ||
  613. Op0.getOperand(0).getOpcode() == ISD::SRL) {
  614. if (Op1.getOperand(0).getOpcode() != ISD::SHL &&
  615. Op1.getOperand(0).getOpcode() != ISD::SRL) {
  616. std::swap(Op0, Op1);
  617. std::swap(Op0Opc, Op1Opc);
  618. std::swap(TargetMask, InsertMask);
  619. }
  620. }
  621. } else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) {
  622. if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL &&
  623. Op1.getOperand(0).getOpcode() != ISD::SRL) {
  624. std::swap(Op0, Op1);
  625. std::swap(Op0Opc, Op1Opc);
  626. std::swap(TargetMask, InsertMask);
  627. }
  628. }
  629. unsigned MB, ME;
  630. if (isRunOfOnes(InsertMask, MB, ME)) {
  631. if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) &&
  632. isInt32Immediate(Op1.getOperand(1), Value)) {
  633. Op1 = Op1.getOperand(0);
  634. SH = (Op1Opc == ISD::SHL) ? Value : 32 - Value;
  635. }
  636. if (Op1Opc == ISD::AND) {
  637. // The AND mask might not be a constant, and we need to make sure that
  638. // if we're going to fold the masking with the insert, all bits not
  639. // know to be zero in the mask are known to be one.
  640. KnownBits MKnown = CurDAG->computeKnownBits(Op1.getOperand(1));
  641. bool CanFoldMask = InsertMask == MKnown.One.getZExtValue();
  642. unsigned SHOpc = Op1.getOperand(0).getOpcode();
  643. if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && CanFoldMask &&
  644. isInt32Immediate(Op1.getOperand(0).getOperand(1), Value)) {
  645. // Note that Value must be in range here (less than 32) because
  646. // otherwise there would not be any bits set in InsertMask.
  647. Op1 = Op1.getOperand(0).getOperand(0);
  648. SH = (SHOpc == ISD::SHL) ? Value : 32 - Value;
  649. }
  650. }
  651. SH &= 31;
  652. SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl),
  653. getI32Imm(ME, dl) };
  654. ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
  655. return true;
  656. }
  657. }
  658. return false;
  659. }
  660. static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N) {
  661. unsigned MaxTruncation = 0;
  662. // Cannot use range-based for loop here as we need the actual use (i.e. we
  663. // need the operand number corresponding to the use). A range-based for
  664. // will unbox the use and provide an SDNode*.
  665. for (SDNode::use_iterator Use = N->use_begin(), UseEnd = N->use_end();
  666. Use != UseEnd; ++Use) {
  667. unsigned Opc =
  668. Use->isMachineOpcode() ? Use->getMachineOpcode() : Use->getOpcode();
  669. switch (Opc) {
  670. default: return 0;
  671. case ISD::TRUNCATE:
  672. if (Use->isMachineOpcode())
  673. return 0;
  674. MaxTruncation =
  675. std::max(MaxTruncation, (unsigned)Use->getValueType(0).getSizeInBits());
  676. continue;
  677. case ISD::STORE: {
  678. if (Use->isMachineOpcode())
  679. return 0;
  680. StoreSDNode *STN = cast<StoreSDNode>(*Use);
  681. unsigned MemVTSize = STN->getMemoryVT().getSizeInBits();
  682. if (MemVTSize == 64 || Use.getOperandNo() != 0)
  683. return 0;
  684. MaxTruncation = std::max(MaxTruncation, MemVTSize);
  685. continue;
  686. }
  687. case PPC::STW8:
  688. case PPC::STWX8:
  689. case PPC::STWU8:
  690. case PPC::STWUX8:
  691. if (Use.getOperandNo() != 0)
  692. return 0;
  693. MaxTruncation = std::max(MaxTruncation, 32u);
  694. continue;
  695. case PPC::STH8:
  696. case PPC::STHX8:
  697. case PPC::STHU8:
  698. case PPC::STHUX8:
  699. if (Use.getOperandNo() != 0)
  700. return 0;
  701. MaxTruncation = std::max(MaxTruncation, 16u);
  702. continue;
  703. case PPC::STB8:
  704. case PPC::STBX8:
  705. case PPC::STBU8:
  706. case PPC::STBUX8:
  707. if (Use.getOperandNo() != 0)
  708. return 0;
  709. MaxTruncation = std::max(MaxTruncation, 8u);
  710. continue;
  711. }
  712. }
  713. return MaxTruncation;
  714. }
  715. // For any 32 < Num < 64, check if the Imm contains at least Num consecutive
  716. // zeros and return the number of bits by the left of these consecutive zeros.
  717. static int findContiguousZerosAtLeast(uint64_t Imm, unsigned Num) {
  718. unsigned HiTZ = countTrailingZeros<uint32_t>(Hi_32(Imm));
  719. unsigned LoLZ = countLeadingZeros<uint32_t>(Lo_32(Imm));
  720. if ((HiTZ + LoLZ) >= Num)
  721. return (32 + HiTZ);
  722. return 0;
  723. }
  724. // Direct materialization of 64-bit constants by enumerated patterns.
  725. static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl,
  726. uint64_t Imm, unsigned &InstCnt) {
  727. unsigned TZ = countTrailingZeros<uint64_t>(Imm);
  728. unsigned LZ = countLeadingZeros<uint64_t>(Imm);
  729. unsigned TO = countTrailingOnes<uint64_t>(Imm);
  730. unsigned LO = countLeadingOnes<uint64_t>(Imm);
  731. unsigned Hi32 = Hi_32(Imm);
  732. unsigned Lo32 = Lo_32(Imm);
  733. SDNode *Result = nullptr;
  734. unsigned Shift = 0;
  735. auto getI32Imm = [CurDAG, dl](unsigned Imm) {
  736. return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
  737. };
  738. // Following patterns use 1 instructions to materialize the Imm.
  739. InstCnt = 1;
  740. // 1-1) Patterns : {zeros}{15-bit valve}
  741. // {ones}{15-bit valve}
  742. if (isInt<16>(Imm)) {
  743. SDValue SDImm = CurDAG->getTargetConstant(Imm, dl, MVT::i64);
  744. return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);
  745. }
  746. // 1-2) Patterns : {zeros}{15-bit valve}{16 zeros}
  747. // {ones}{15-bit valve}{16 zeros}
  748. if (TZ > 15 && (LZ > 32 || LO > 32))
  749. return CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
  750. getI32Imm((Imm >> 16) & 0xffff));
  751. // Following patterns use 2 instructions to materialize the Imm.
  752. InstCnt = 2;
  753. assert(LZ < 64 && "Unexpected leading zeros here.");
  754. // Count of ones follwing the leading zeros.
  755. unsigned FO = countLeadingOnes<uint64_t>(Imm << LZ);
  756. // 2-1) Patterns : {zeros}{31-bit value}
  757. // {ones}{31-bit value}
  758. if (isInt<32>(Imm)) {
  759. uint64_t ImmHi16 = (Imm >> 16) & 0xffff;
  760. unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
  761. Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
  762. return CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
  763. getI32Imm(Imm & 0xffff));
  764. }
  765. // 2-2) Patterns : {zeros}{ones}{15-bit value}{zeros}
  766. // {zeros}{15-bit value}{zeros}
  767. // {zeros}{ones}{15-bit value}
  768. // {ones}{15-bit value}{zeros}
  769. // We can take advantage of LI's sign-extension semantics to generate leading
  770. // ones, and then use RLDIC to mask off the ones in both sides after rotation.
  771. if ((LZ + FO + TZ) > 48) {
  772. Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
  773. getI32Imm((Imm >> TZ) & 0xffff));
  774. return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),
  775. getI32Imm(TZ), getI32Imm(LZ));
  776. }
  777. // 2-3) Pattern : {zeros}{15-bit value}{ones}
  778. // Shift right the Imm by (48 - LZ) bits to construct a negtive 16 bits value,
  779. // therefore we can take advantage of LI's sign-extension semantics, and then
  780. // mask them off after rotation.
  781. //
  782. // +--LZ--||-15-bit-||--TO--+ +-------------|--16-bit--+
  783. // |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1|
  784. // +------------------------+ +------------------------+
  785. // 63 0 63 0
  786. // Imm (Imm >> (48 - LZ) & 0xffff)
  787. // +----sext-----|--16-bit--+ +clear-|-----------------+
  788. // |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111|
  789. // +------------------------+ +------------------------+
  790. // 63 0 63 0
  791. // LI8: sext many leading zeros RLDICL: rotate left (48 - LZ), clear left LZ
  792. if ((LZ + TO) > 48) {
  793. // Since the immediates with (LZ > 32) have been handled by previous
  794. // patterns, here we have (LZ <= 32) to make sure we will not shift right
  795. // the Imm by a negative value.
  796. assert(LZ <= 32 && "Unexpected shift value.");
  797. Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
  798. getI32Imm((Imm >> (48 - LZ) & 0xffff)));
  799. return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
  800. getI32Imm(48 - LZ), getI32Imm(LZ));
  801. }
  802. // 2-4) Patterns : {zeros}{ones}{15-bit value}{ones}
  803. // {ones}{15-bit value}{ones}
  804. // We can take advantage of LI's sign-extension semantics to generate leading
  805. // ones, and then use RLDICL to mask off the ones in left sides (if required)
  806. // after rotation.
  807. //
  808. // +-LZ-FO||-15-bit-||--TO--+ +-------------|--16-bit--+
  809. // |00011110bbbbbbbbb1111111| -> |000000000011110bbbbbbbbb|
  810. // +------------------------+ +------------------------+
  811. // 63 0 63 0
  812. // Imm (Imm >> TO) & 0xffff
  813. // +----sext-----|--16-bit--+ +LZ|---------------------+
  814. // |111111111111110bbbbbbbbb| -> |00011110bbbbbbbbb1111111|
  815. // +------------------------+ +------------------------+
  816. // 63 0 63 0
  817. // LI8: sext many leading zeros RLDICL: rotate left TO, clear left LZ
  818. if ((LZ + FO + TO) > 48) {
  819. Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
  820. getI32Imm((Imm >> TO) & 0xffff));
  821. return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
  822. getI32Imm(TO), getI32Imm(LZ));
  823. }
  824. // 2-5) Pattern : {32 zeros}{****}{0}{15-bit value}
  825. // If Hi32 is zero and the Lo16(in Lo32) can be presented as a positive 16 bit
  826. // value, we can use LI for Lo16 without generating leading ones then add the
  827. // Hi16(in Lo32).
  828. if (LZ == 32 && ((Lo32 & 0x8000) == 0)) {
  829. Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
  830. getI32Imm(Lo32 & 0xffff));
  831. return CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64, SDValue(Result, 0),
  832. getI32Imm(Lo32 >> 16));
  833. }
  834. // 2-6) Patterns : {******}{49 zeros}{******}
  835. // {******}{49 ones}{******}
  836. // If the Imm contains 49 consecutive zeros/ones, it means that a total of 15
  837. // bits remain on both sides. Rotate right the Imm to construct an int<16>
  838. // value, use LI for int<16> value and then use RLDICL without mask to rotate
  839. // it back.
  840. //
  841. // 1) findContiguousZerosAtLeast(Imm, 49)
  842. // +------|--zeros-|------+ +---ones--||---15 bit--+
  843. // |bbbbbb0000000000aaaaaa| -> |0000000000aaaaaabbbbbb|
  844. // +----------------------+ +----------------------+
  845. // 63 0 63 0
  846. //
  847. // 2) findContiguousZerosAtLeast(~Imm, 49)
  848. // +------|--ones--|------+ +---ones--||---15 bit--+
  849. // |bbbbbb1111111111aaaaaa| -> |1111111111aaaaaabbbbbb|
  850. // +----------------------+ +----------------------+
  851. // 63 0 63 0
  852. if ((Shift = findContiguousZerosAtLeast(Imm, 49)) ||
  853. (Shift = findContiguousZerosAtLeast(~Imm, 49))) {
  854. uint64_t RotImm = (Imm >> Shift) | (Imm << (64 - Shift));
  855. Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
  856. getI32Imm(RotImm & 0xffff));
  857. return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
  858. getI32Imm(Shift), getI32Imm(0));
  859. }
  860. // Following patterns use 3 instructions to materialize the Imm.
  861. InstCnt = 3;
  862. // 3-1) Patterns : {zeros}{ones}{31-bit value}{zeros}
  863. // {zeros}{31-bit value}{zeros}
  864. // {zeros}{ones}{31-bit value}
  865. // {ones}{31-bit value}{zeros}
  866. // We can take advantage of LIS's sign-extension semantics to generate leading
  867. // ones, add the remaining bits with ORI, and then use RLDIC to mask off the
  868. // ones in both sides after rotation.
  869. if ((LZ + FO + TZ) > 32) {
  870. uint64_t ImmHi16 = (Imm >> (TZ + 16)) & 0xffff;
  871. unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
  872. Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
  873. Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
  874. getI32Imm((Imm >> TZ) & 0xffff));
  875. return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),
  876. getI32Imm(TZ), getI32Imm(LZ));
  877. }
  878. // 3-2) Pattern : {zeros}{31-bit value}{ones}
  879. // Shift right the Imm by (32 - LZ) bits to construct a negtive 32 bits value,
  880. // therefore we can take advantage of LIS's sign-extension semantics, add
  881. // the remaining bits with ORI, and then mask them off after rotation.
  882. // This is similar to Pattern 2-3, please refer to the diagram there.
  883. if ((LZ + TO) > 32) {
  884. // Since the immediates with (LZ > 32) have been handled by previous
  885. // patterns, here we have (LZ <= 32) to make sure we will not shift right
  886. // the Imm by a negative value.
  887. assert(LZ <= 32 && "Unexpected shift value.");
  888. Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
  889. getI32Imm((Imm >> (48 - LZ)) & 0xffff));
  890. Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
  891. getI32Imm((Imm >> (32 - LZ)) & 0xffff));
  892. return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
  893. getI32Imm(32 - LZ), getI32Imm(LZ));
  894. }
  895. // 3-3) Patterns : {zeros}{ones}{31-bit value}{ones}
  896. // {ones}{31-bit value}{ones}
  897. // We can take advantage of LIS's sign-extension semantics to generate leading
  898. // ones, add the remaining bits with ORI, and then use RLDICL to mask off the
  899. // ones in left sides (if required) after rotation.
  900. // This is similar to Pattern 2-4, please refer to the diagram there.
  901. if ((LZ + FO + TO) > 32) {
  902. Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
  903. getI32Imm((Imm >> (TO + 16)) & 0xffff));
  904. Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
  905. getI32Imm((Imm >> TO) & 0xffff));
  906. return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
  907. getI32Imm(TO), getI32Imm(LZ));
  908. }
  909. // 3-4) Patterns : High word == Low word
  910. if (Hi32 == Lo32) {
  911. // Handle the first 32 bits.
  912. uint64_t ImmHi16 = (Lo32 >> 16) & 0xffff;
  913. unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
  914. Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
  915. Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
  916. getI32Imm(Lo32 & 0xffff));
  917. // Use rldimi to insert the Low word into High word.
  918. SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32),
  919. getI32Imm(0)};
  920. return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
  921. }
  922. // 3-5) Patterns : {******}{33 zeros}{******}
  923. // {******}{33 ones}{******}
  924. // If the Imm contains 33 consecutive zeros/ones, it means that a total of 31
  925. // bits remain on both sides. Rotate right the Imm to construct an int<32>
  926. // value, use LIS + ORI for int<32> value and then use RLDICL without mask to
  927. // rotate it back.
  928. // This is similar to Pattern 2-6, please refer to the diagram there.
  929. if ((Shift = findContiguousZerosAtLeast(Imm, 33)) ||
  930. (Shift = findContiguousZerosAtLeast(~Imm, 33))) {
  931. uint64_t RotImm = (Imm >> Shift) | (Imm << (64 - Shift));
  932. uint64_t ImmHi16 = (RotImm >> 16) & 0xffff;
  933. unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
  934. Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
  935. Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
  936. getI32Imm(RotImm & 0xffff));
  937. return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
  938. getI32Imm(Shift), getI32Imm(0));
  939. }
  940. InstCnt = 0;
  941. return nullptr;
  942. }
  943. static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm,
  944. unsigned *InstCnt = nullptr) {
  945. unsigned InstCntDirect = 0;
  946. // No more than 3 instructions is used if we can select the i64 immediate
  947. // directly.
  948. SDNode *Result = selectI64ImmDirect(CurDAG, dl, Imm, InstCntDirect);
  949. if (Result) {
  950. if (InstCnt)
  951. *InstCnt = InstCntDirect;
  952. return Result;
  953. }
  954. auto getI32Imm = [CurDAG, dl](unsigned Imm) {
  955. return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
  956. };
  957. // Handle the upper 32 bit value.
  958. Result =
  959. selectI64ImmDirect(CurDAG, dl, Imm & 0xffffffff00000000, InstCntDirect);
  960. // Add in the last bits as required.
  961. if (uint32_t Hi16 = (Lo_32(Imm) >> 16) & 0xffff) {
  962. Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64,
  963. SDValue(Result, 0), getI32Imm(Hi16));
  964. ++InstCntDirect;
  965. }
  966. if (uint32_t Lo16 = Lo_32(Imm) & 0xffff) {
  967. Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
  968. getI32Imm(Lo16));
  969. ++InstCntDirect;
  970. }
  971. if (InstCnt)
  972. *InstCnt = InstCntDirect;
  973. return Result;
  974. }
  975. // Select a 64-bit constant.
  976. static SDNode *selectI64Imm(SelectionDAG *CurDAG, SDNode *N) {
  977. SDLoc dl(N);
  978. // Get 64 bit value.
  979. int64_t Imm = cast<ConstantSDNode>(N)->getZExtValue();
  980. if (unsigned MinSize = allUsesTruncate(CurDAG, N)) {
  981. uint64_t SextImm = SignExtend64(Imm, MinSize);
  982. SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);
  983. if (isInt<16>(SextImm))
  984. return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);
  985. }
  986. return selectI64Imm(CurDAG, dl, Imm);
  987. }
  988. namespace {
  989. class BitPermutationSelector {
  990. struct ValueBit {
  991. SDValue V;
  992. // The bit number in the value, using a convention where bit 0 is the
  993. // lowest-order bit.
  994. unsigned Idx;
  995. // ConstZero means a bit we need to mask off.
  996. // Variable is a bit comes from an input variable.
  997. // VariableKnownToBeZero is also a bit comes from an input variable,
  998. // but it is known to be already zero. So we do not need to mask them.
  999. enum Kind {
  1000. ConstZero,
  1001. Variable,
  1002. VariableKnownToBeZero
  1003. } K;
  1004. ValueBit(SDValue V, unsigned I, Kind K = Variable)
  1005. : V(V), Idx(I), K(K) {}
  1006. ValueBit(Kind K = Variable)
  1007. : V(SDValue(nullptr, 0)), Idx(UINT32_MAX), K(K) {}
  1008. bool isZero() const {
  1009. return K == ConstZero || K == VariableKnownToBeZero;
  1010. }
  1011. bool hasValue() const {
  1012. return K == Variable || K == VariableKnownToBeZero;
  1013. }
  1014. SDValue getValue() const {
  1015. assert(hasValue() && "Cannot get the value of a constant bit");
  1016. return V;
  1017. }
  1018. unsigned getValueBitIndex() const {
  1019. assert(hasValue() && "Cannot get the value bit index of a constant bit");
  1020. return Idx;
  1021. }
  1022. };
  1023. // A bit group has the same underlying value and the same rotate factor.
  1024. struct BitGroup {
  1025. SDValue V;
  1026. unsigned RLAmt;
  1027. unsigned StartIdx, EndIdx;
  1028. // This rotation amount assumes that the lower 32 bits of the quantity are
  1029. // replicated in the high 32 bits by the rotation operator (which is done
  1030. // by rlwinm and friends in 64-bit mode).
  1031. bool Repl32;
  1032. // Did converting to Repl32 == true change the rotation factor? If it did,
  1033. // it decreased it by 32.
  1034. bool Repl32CR;
  1035. // Was this group coalesced after setting Repl32 to true?
  1036. bool Repl32Coalesced;
  1037. BitGroup(SDValue V, unsigned R, unsigned S, unsigned E)
  1038. : V(V), RLAmt(R), StartIdx(S), EndIdx(E), Repl32(false), Repl32CR(false),
  1039. Repl32Coalesced(false) {
  1040. LLVM_DEBUG(dbgs() << "\tbit group for " << V.getNode() << " RLAmt = " << R
  1041. << " [" << S << ", " << E << "]\n");
  1042. }
  1043. };
  1044. // Information on each (Value, RLAmt) pair (like the number of groups
  1045. // associated with each) used to choose the lowering method.
  1046. struct ValueRotInfo {
  1047. SDValue V;
  1048. unsigned RLAmt = std::numeric_limits<unsigned>::max();
  1049. unsigned NumGroups = 0;
  1050. unsigned FirstGroupStartIdx = std::numeric_limits<unsigned>::max();
  1051. bool Repl32 = false;
  1052. ValueRotInfo() = default;
  1053. // For sorting (in reverse order) by NumGroups, and then by
  1054. // FirstGroupStartIdx.
  1055. bool operator < (const ValueRotInfo &Other) const {
  1056. // We need to sort so that the non-Repl32 come first because, when we're
  1057. // doing masking, the Repl32 bit groups might be subsumed into the 64-bit
  1058. // masking operation.
  1059. if (Repl32 < Other.Repl32)
  1060. return true;
  1061. else if (Repl32 > Other.Repl32)
  1062. return false;
  1063. else if (NumGroups > Other.NumGroups)
  1064. return true;
  1065. else if (NumGroups < Other.NumGroups)
  1066. return false;
  1067. else if (RLAmt == 0 && Other.RLAmt != 0)
  1068. return true;
  1069. else if (RLAmt != 0 && Other.RLAmt == 0)
  1070. return false;
  1071. else if (FirstGroupStartIdx < Other.FirstGroupStartIdx)
  1072. return true;
  1073. return false;
  1074. }
  1075. };
  1076. using ValueBitsMemoizedValue = std::pair<bool, SmallVector<ValueBit, 64>>;
  1077. using ValueBitsMemoizer =
  1078. DenseMap<SDValue, std::unique_ptr<ValueBitsMemoizedValue>>;
  1079. ValueBitsMemoizer Memoizer;
  1080. // Return a pair of bool and a SmallVector pointer to a memoization entry.
  1081. // The bool is true if something interesting was deduced, otherwise if we're
  1082. // providing only a generic representation of V (or something else likewise
  1083. // uninteresting for instruction selection) through the SmallVector.
  1084. std::pair<bool, SmallVector<ValueBit, 64> *> getValueBits(SDValue V,
  1085. unsigned NumBits) {
  1086. auto &ValueEntry = Memoizer[V];
  1087. if (ValueEntry)
  1088. return std::make_pair(ValueEntry->first, &ValueEntry->second);
  1089. ValueEntry.reset(new ValueBitsMemoizedValue());
  1090. bool &Interesting = ValueEntry->first;
  1091. SmallVector<ValueBit, 64> &Bits = ValueEntry->second;
  1092. Bits.resize(NumBits);
  1093. switch (V.getOpcode()) {
  1094. default: break;
  1095. case ISD::ROTL:
  1096. if (isa<ConstantSDNode>(V.getOperand(1))) {
  1097. unsigned RotAmt = V.getConstantOperandVal(1);
  1098. const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
  1099. for (unsigned i = 0; i < NumBits; ++i)
  1100. Bits[i] = LHSBits[i < RotAmt ? i + (NumBits - RotAmt) : i - RotAmt];
  1101. return std::make_pair(Interesting = true, &Bits);
  1102. }
  1103. break;
  1104. case ISD::SHL:
  1105. case PPCISD::SHL:
  1106. if (isa<ConstantSDNode>(V.getOperand(1))) {
  1107. unsigned ShiftAmt = V.getConstantOperandVal(1);
  1108. const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
  1109. for (unsigned i = ShiftAmt; i < NumBits; ++i)
  1110. Bits[i] = LHSBits[i - ShiftAmt];
  1111. for (unsigned i = 0; i < ShiftAmt; ++i)
  1112. Bits[i] = ValueBit(ValueBit::ConstZero);
  1113. return std::make_pair(Interesting = true, &Bits);
  1114. }
  1115. break;
  1116. case ISD::SRL:
  1117. case PPCISD::SRL:
  1118. if (isa<ConstantSDNode>(V.getOperand(1))) {
  1119. unsigned ShiftAmt = V.getConstantOperandVal(1);
  1120. const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
  1121. for (unsigned i = 0; i < NumBits - ShiftAmt; ++i)
  1122. Bits[i] = LHSBits[i + ShiftAmt];
  1123. for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i)
  1124. Bits[i] = ValueBit(ValueBit::ConstZero);
  1125. return std::make_pair(Interesting = true, &Bits);
  1126. }
  1127. break;
  1128. case ISD::AND:
  1129. if (isa<ConstantSDNode>(V.getOperand(1))) {
  1130. uint64_t Mask = V.getConstantOperandVal(1);
  1131. const SmallVector<ValueBit, 64> *LHSBits;
  1132. // Mark this as interesting, only if the LHS was also interesting. This
  1133. // prevents the overall procedure from matching a single immediate 'and'
  1134. // (which is non-optimal because such an and might be folded with other
  1135. // things if we don't select it here).
  1136. std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), NumBits);
  1137. for (unsigned i = 0; i < NumBits; ++i)
  1138. if (((Mask >> i) & 1) == 1)
  1139. Bits[i] = (*LHSBits)[i];
  1140. else {
  1141. // AND instruction masks this bit. If the input is already zero,
  1142. // we have nothing to do here. Otherwise, make the bit ConstZero.
  1143. if ((*LHSBits)[i].isZero())
  1144. Bits[i] = (*LHSBits)[i];
  1145. else
  1146. Bits[i] = ValueBit(ValueBit::ConstZero);
  1147. }
  1148. return std::make_pair(Interesting, &Bits);
  1149. }
  1150. break;
  1151. case ISD::OR: {
  1152. const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
  1153. const auto &RHSBits = *getValueBits(V.getOperand(1), NumBits).second;
  1154. bool AllDisjoint = true;
  1155. SDValue LastVal = SDValue();
  1156. unsigned LastIdx = 0;
  1157. for (unsigned i = 0; i < NumBits; ++i) {
  1158. if (LHSBits[i].isZero() && RHSBits[i].isZero()) {
  1159. // If both inputs are known to be zero and one is ConstZero and
  1160. // another is VariableKnownToBeZero, we can select whichever
  1161. // we like. To minimize the number of bit groups, we select
  1162. // VariableKnownToBeZero if this bit is the next bit of the same
  1163. // input variable from the previous bit. Otherwise, we select
  1164. // ConstZero.
  1165. if (LHSBits[i].hasValue() && LHSBits[i].getValue() == LastVal &&
  1166. LHSBits[i].getValueBitIndex() == LastIdx + 1)
  1167. Bits[i] = LHSBits[i];
  1168. else if (RHSBits[i].hasValue() && RHSBits[i].getValue() == LastVal &&
  1169. RHSBits[i].getValueBitIndex() == LastIdx + 1)
  1170. Bits[i] = RHSBits[i];
  1171. else
  1172. Bits[i] = ValueBit(ValueBit::ConstZero);
  1173. }
  1174. else if (LHSBits[i].isZero())
  1175. Bits[i] = RHSBits[i];
  1176. else if (RHSBits[i].isZero())
  1177. Bits[i] = LHSBits[i];
  1178. else {
  1179. AllDisjoint = false;
  1180. break;
  1181. }
  1182. // We remember the value and bit index of this bit.
  1183. if (Bits[i].hasValue()) {
  1184. LastVal = Bits[i].getValue();
  1185. LastIdx = Bits[i].getValueBitIndex();
  1186. }
  1187. else {
  1188. if (LastVal) LastVal = SDValue();
  1189. LastIdx = 0;
  1190. }
  1191. }
  1192. if (!AllDisjoint)
  1193. break;
  1194. return std::make_pair(Interesting = true, &Bits);
  1195. }
  1196. case ISD::ZERO_EXTEND: {
  1197. // We support only the case with zero extension from i32 to i64 so far.
  1198. if (V.getValueType() != MVT::i64 ||
  1199. V.getOperand(0).getValueType() != MVT::i32)
  1200. break;
  1201. const SmallVector<ValueBit, 64> *LHSBits;
  1202. const unsigned NumOperandBits = 32;
  1203. std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
  1204. NumOperandBits);
  1205. for (unsigned i = 0; i < NumOperandBits; ++i)
  1206. Bits[i] = (*LHSBits)[i];
  1207. for (unsigned i = NumOperandBits; i < NumBits; ++i)
  1208. Bits[i] = ValueBit(ValueBit::ConstZero);
  1209. return std::make_pair(Interesting, &Bits);
  1210. }
  1211. case ISD::TRUNCATE: {
  1212. EVT FromType = V.getOperand(0).getValueType();
  1213. EVT ToType = V.getValueType();
  1214. // We support only the case with truncate from i64 to i32.
  1215. if (FromType != MVT::i64 || ToType != MVT::i32)
  1216. break;
  1217. const unsigned NumAllBits = FromType.getSizeInBits();
  1218. SmallVector<ValueBit, 64> *InBits;
  1219. std::tie(Interesting, InBits) = getValueBits(V.getOperand(0),
  1220. NumAllBits);
  1221. const unsigned NumValidBits = ToType.getSizeInBits();
  1222. // A 32-bit instruction cannot touch upper 32-bit part of 64-bit value.
  1223. // So, we cannot include this truncate.
  1224. bool UseUpper32bit = false;
  1225. for (unsigned i = 0; i < NumValidBits; ++i)
  1226. if ((*InBits)[i].hasValue() && (*InBits)[i].getValueBitIndex() >= 32) {
  1227. UseUpper32bit = true;
  1228. break;
  1229. }
  1230. if (UseUpper32bit)
  1231. break;
  1232. for (unsigned i = 0; i < NumValidBits; ++i)
  1233. Bits[i] = (*InBits)[i];
  1234. return std::make_pair(Interesting, &Bits);
  1235. }
  1236. case ISD::AssertZext: {
  1237. // For AssertZext, we look through the operand and
  1238. // mark the bits known to be zero.
  1239. const SmallVector<ValueBit, 64> *LHSBits;
  1240. std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
  1241. NumBits);
  1242. EVT FromType = cast<VTSDNode>(V.getOperand(1))->getVT();
  1243. const unsigned NumValidBits = FromType.getSizeInBits();
  1244. for (unsigned i = 0; i < NumValidBits; ++i)
  1245. Bits[i] = (*LHSBits)[i];
  1246. // These bits are known to be zero but the AssertZext may be from a value
  1247. // that already has some constant zero bits (i.e. from a masking and).
  1248. for (unsigned i = NumValidBits; i < NumBits; ++i)
  1249. Bits[i] = (*LHSBits)[i].hasValue()
  1250. ? ValueBit((*LHSBits)[i].getValue(),
  1251. (*LHSBits)[i].getValueBitIndex(),
  1252. ValueBit::VariableKnownToBeZero)
  1253. : ValueBit(ValueBit::ConstZero);
  1254. return std::make_pair(Interesting, &Bits);
  1255. }
  1256. case ISD::LOAD:
  1257. LoadSDNode *LD = cast<LoadSDNode>(V);
  1258. if (ISD::isZEXTLoad(V.getNode()) && V.getResNo() == 0) {
  1259. EVT VT = LD->getMemoryVT();
  1260. const unsigned NumValidBits = VT.getSizeInBits();
  1261. for (unsigned i = 0; i < NumValidBits; ++i)
  1262. Bits[i] = ValueBit(V, i);
  1263. // These bits are known to be zero.
  1264. for (unsigned i = NumValidBits; i < NumBits; ++i)
  1265. Bits[i] = ValueBit(V, i, ValueBit::VariableKnownToBeZero);
  1266. // Zero-extending load itself cannot be optimized. So, it is not
  1267. // interesting by itself though it gives useful information.
  1268. return std::make_pair(Interesting = false, &Bits);
  1269. }
  1270. break;
  1271. }
  1272. for (unsigned i = 0; i < NumBits; ++i)
  1273. Bits[i] = ValueBit(V, i);
  1274. return std::make_pair(Interesting = false, &Bits);
  1275. }
  1276. // For each value (except the constant ones), compute the left-rotate amount
  1277. // to get it from its original to final position.
  1278. void computeRotationAmounts() {
  1279. NeedMask = false;
  1280. RLAmt.resize(Bits.size());
  1281. for (unsigned i = 0; i < Bits.size(); ++i)
  1282. if (Bits[i].hasValue()) {
  1283. unsigned VBI = Bits[i].getValueBitIndex();
  1284. if (i >= VBI)
  1285. RLAmt[i] = i - VBI;
  1286. else
  1287. RLAmt[i] = Bits.size() - (VBI - i);
  1288. } else if (Bits[i].isZero()) {
  1289. NeedMask = true;
  1290. RLAmt[i] = UINT32_MAX;
  1291. } else {
  1292. llvm_unreachable("Unknown value bit type");
  1293. }
  1294. }
  1295. // Collect groups of consecutive bits with the same underlying value and
  1296. // rotation factor. If we're doing late masking, we ignore zeros, otherwise
  1297. // they break up groups.
  1298. void collectBitGroups(bool LateMask) {
  1299. BitGroups.clear();
  1300. unsigned LastRLAmt = RLAmt[0];
  1301. SDValue LastValue = Bits[0].hasValue() ? Bits[0].getValue() : SDValue();
  1302. unsigned LastGroupStartIdx = 0;
  1303. bool IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();
  1304. for (unsigned i = 1; i < Bits.size(); ++i) {
  1305. unsigned ThisRLAmt = RLAmt[i];
  1306. SDValue ThisValue = Bits[i].hasValue() ? Bits[i].getValue() : SDValue();
  1307. if (LateMask && !ThisValue) {
  1308. ThisValue = LastValue;
  1309. ThisRLAmt = LastRLAmt;
  1310. // If we're doing late masking, then the first bit group always starts
  1311. // at zero (even if the first bits were zero).
  1312. if (BitGroups.empty())
  1313. LastGroupStartIdx = 0;
  1314. }
  1315. // If this bit is known to be zero and the current group is a bit group
  1316. // of zeros, we do not need to terminate the current bit group even the
  1317. // Value or RLAmt does not match here. Instead, we terminate this group
  1318. // when the first non-zero bit appears later.
  1319. if (IsGroupOfZeros && Bits[i].isZero())
  1320. continue;
  1321. // If this bit has the same underlying value and the same rotate factor as
  1322. // the last one, then they're part of the same group.
  1323. if (ThisRLAmt == LastRLAmt && ThisValue == LastValue)
  1324. // We cannot continue the current group if this bits is not known to
  1325. // be zero in a bit group of zeros.
  1326. if (!(IsGroupOfZeros && ThisValue && !Bits[i].isZero()))
  1327. continue;
  1328. if (LastValue.getNode())
  1329. BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
  1330. i-1));
  1331. LastRLAmt = ThisRLAmt;
  1332. LastValue = ThisValue;
  1333. LastGroupStartIdx = i;
  1334. IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();
  1335. }
  1336. if (LastValue.getNode())
  1337. BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
  1338. Bits.size()-1));
  1339. if (BitGroups.empty())
  1340. return;
  1341. // We might be able to combine the first and last groups.
  1342. if (BitGroups.size() > 1) {
  1343. // If the first and last groups are the same, then remove the first group
  1344. // in favor of the last group, making the ending index of the last group
  1345. // equal to the ending index of the to-be-removed first group.
  1346. if (BitGroups[0].StartIdx == 0 &&
  1347. BitGroups[BitGroups.size()-1].EndIdx == Bits.size()-1 &&
  1348. BitGroups[0].V == BitGroups[BitGroups.size()-1].V &&
  1349. BitGroups[0].RLAmt == BitGroups[BitGroups.size()-1].RLAmt) {
  1350. LLVM_DEBUG(dbgs() << "\tcombining final bit group with initial one\n");
  1351. BitGroups[BitGroups.size()-1].EndIdx = BitGroups[0].EndIdx;
  1352. BitGroups.erase(BitGroups.begin());
  1353. }
  1354. }
  1355. }
  1356. // Take all (SDValue, RLAmt) pairs and sort them by the number of groups
  1357. // associated with each. If the number of groups are same, we prefer a group
  1358. // which does not require rotate, i.e. RLAmt is 0, to avoid the first rotate
  1359. // instruction. If there is a degeneracy, pick the one that occurs
  1360. // first (in the final value).
  1361. void collectValueRotInfo() {
  1362. ValueRots.clear();
  1363. for (auto &BG : BitGroups) {
  1364. unsigned RLAmtKey = BG.RLAmt + (BG.Repl32 ? 64 : 0);
  1365. ValueRotInfo &VRI = ValueRots[std::make_pair(BG.V, RLAmtKey)];
  1366. VRI.V = BG.V;
  1367. VRI.RLAmt = BG.RLAmt;
  1368. VRI.Repl32 = BG.Repl32;
  1369. VRI.NumGroups += 1;
  1370. VRI.FirstGroupStartIdx = std::min(VRI.FirstGroupStartIdx, BG.StartIdx);
  1371. }
  1372. // Now that we've collected the various ValueRotInfo instances, we need to
  1373. // sort them.
  1374. ValueRotsVec.clear();
  1375. for (auto &I : ValueRots) {
  1376. ValueRotsVec.push_back(I.second);
  1377. }
  1378. llvm::sort(ValueRotsVec);
  1379. }
  1380. // In 64-bit mode, rlwinm and friends have a rotation operator that
  1381. // replicates the low-order 32 bits into the high-order 32-bits. The mask
  1382. // indices of these instructions can only be in the lower 32 bits, so they
  1383. // can only represent some 64-bit bit groups. However, when they can be used,
  1384. // the 32-bit replication can be used to represent, as a single bit group,
  1385. // otherwise separate bit groups. We'll convert to replicated-32-bit bit
  1386. // groups when possible. Returns true if any of the bit groups were
  1387. // converted.
  1388. void assignRepl32BitGroups() {
  1389. // If we have bits like this:
  1390. //
  1391. // Indices: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
  1392. // V bits: ... 7 6 5 4 3 2 1 0 31 30 29 28 27 26 25 24
  1393. // Groups: | RLAmt = 8 | RLAmt = 40 |
  1394. //
  1395. // But, making use of a 32-bit operation that replicates the low-order 32
  1396. // bits into the high-order 32 bits, this can be one bit group with a RLAmt
  1397. // of 8.
  1398. auto IsAllLow32 = [this](BitGroup & BG) {
  1399. if (BG.StartIdx <= BG.EndIdx) {
  1400. for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) {
  1401. if (!Bits[i].hasValue())
  1402. continue;
  1403. if (Bits[i].getValueBitIndex() >= 32)
  1404. return false;
  1405. }
  1406. } else {
  1407. for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) {
  1408. if (!Bits[i].hasValue())
  1409. continue;
  1410. if (Bits[i].getValueBitIndex() >= 32)
  1411. return false;
  1412. }
  1413. for (unsigned i = 0; i <= BG.EndIdx; ++i) {
  1414. if (!Bits[i].hasValue())
  1415. continue;
  1416. if (Bits[i].getValueBitIndex() >= 32)
  1417. return false;
  1418. }
  1419. }
  1420. return true;
  1421. };
  1422. for (auto &BG : BitGroups) {
  1423. // If this bit group has RLAmt of 0 and will not be merged with
  1424. // another bit group, we don't benefit from Repl32. We don't mark
  1425. // such group to give more freedom for later instruction selection.
  1426. if (BG.RLAmt == 0) {
  1427. auto PotentiallyMerged = [this](BitGroup & BG) {
  1428. for (auto &BG2 : BitGroups)
  1429. if (&BG != &BG2 && BG.V == BG2.V &&
  1430. (BG2.RLAmt == 0 || BG2.RLAmt == 32))
  1431. return true;
  1432. return false;
  1433. };
  1434. if (!PotentiallyMerged(BG))
  1435. continue;
  1436. }
  1437. if (BG.StartIdx < 32 && BG.EndIdx < 32) {
  1438. if (IsAllLow32(BG)) {
  1439. if (BG.RLAmt >= 32) {
  1440. BG.RLAmt -= 32;
  1441. BG.Repl32CR = true;
  1442. }
  1443. BG.Repl32 = true;
  1444. LLVM_DEBUG(dbgs() << "\t32-bit replicated bit group for "
  1445. << BG.V.getNode() << " RLAmt = " << BG.RLAmt << " ["
  1446. << BG.StartIdx << ", " << BG.EndIdx << "]\n");
  1447. }
  1448. }
  1449. }
  1450. // Now walk through the bit groups, consolidating where possible.
  1451. for (auto I = BitGroups.begin(); I != BitGroups.end();) {
  1452. // We might want to remove this bit group by merging it with the previous
  1453. // group (which might be the ending group).
  1454. auto IP = (I == BitGroups.begin()) ?
  1455. std::prev(BitGroups.end()) : std::prev(I);
  1456. if (I->Repl32 && IP->Repl32 && I->V == IP->V && I->RLAmt == IP->RLAmt &&
  1457. I->StartIdx == (IP->EndIdx + 1) % 64 && I != IP) {
  1458. LLVM_DEBUG(dbgs() << "\tcombining 32-bit replicated bit group for "
  1459. << I->V.getNode() << " RLAmt = " << I->RLAmt << " ["
  1460. << I->StartIdx << ", " << I->EndIdx
  1461. << "] with group with range [" << IP->StartIdx << ", "
  1462. << IP->EndIdx << "]\n");
  1463. IP->EndIdx = I->EndIdx;
  1464. IP->Repl32CR = IP->Repl32CR || I->Repl32CR;
  1465. IP->Repl32Coalesced = true;
  1466. I = BitGroups.erase(I);
  1467. continue;
  1468. } else {
  1469. // There is a special case worth handling: If there is a single group
  1470. // covering the entire upper 32 bits, and it can be merged with both
  1471. // the next and previous groups (which might be the same group), then
  1472. // do so. If it is the same group (so there will be only one group in
  1473. // total), then we need to reverse the order of the range so that it
  1474. // covers the entire 64 bits.
  1475. if (I->StartIdx == 32 && I->EndIdx == 63) {
  1476. assert(std::next(I) == BitGroups.end() &&
  1477. "bit group ends at index 63 but there is another?");
  1478. auto IN = BitGroups.begin();
  1479. if (IP->Repl32 && IN->Repl32 && I->V == IP->V && I->V == IN->V &&
  1480. (I->RLAmt % 32) == IP->RLAmt && (I->RLAmt % 32) == IN->RLAmt &&
  1481. IP->EndIdx == 31 && IN->StartIdx == 0 && I != IP &&
  1482. IsAllLow32(*I)) {
  1483. LLVM_DEBUG(dbgs() << "\tcombining bit group for " << I->V.getNode()
  1484. << " RLAmt = " << I->RLAmt << " [" << I->StartIdx
  1485. << ", " << I->EndIdx
  1486. << "] with 32-bit replicated groups with ranges ["
  1487. << IP->StartIdx << ", " << IP->EndIdx << "] and ["
  1488. << IN->StartIdx << ", " << IN->EndIdx << "]\n");
  1489. if (IP == IN) {
  1490. // There is only one other group; change it to cover the whole
  1491. // range (backward, so that it can still be Repl32 but cover the
  1492. // whole 64-bit range).
  1493. IP->StartIdx = 31;
  1494. IP->EndIdx = 30;
  1495. IP->Repl32CR = IP->Repl32CR || I->RLAmt >= 32;
  1496. IP->Repl32Coalesced = true;
  1497. I = BitGroups.erase(I);
  1498. } else {
  1499. // There are two separate groups, one before this group and one
  1500. // after us (at the beginning). We're going to remove this group,
  1501. // but also the group at the very beginning.
  1502. IP->EndIdx = IN->EndIdx;
  1503. IP->Repl32CR = IP->Repl32CR || IN->Repl32CR || I->RLAmt >= 32;
  1504. IP->Repl32Coalesced = true;
  1505. I = BitGroups.erase(I);
  1506. BitGroups.erase(BitGroups.begin());
  1507. }
  1508. // This must be the last group in the vector (and we might have
  1509. // just invalidated the iterator above), so break here.
  1510. break;
  1511. }
  1512. }
  1513. }
  1514. ++I;
  1515. }
  1516. }
  1517. SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
  1518. return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
  1519. }
  1520. uint64_t getZerosMask() {
  1521. uint64_t Mask = 0;
  1522. for (unsigned i = 0; i < Bits.size(); ++i) {
  1523. if (Bits[i].hasValue())
  1524. continue;
  1525. Mask |= (UINT64_C(1) << i);
  1526. }
  1527. return ~Mask;
  1528. }
  1529. // This method extends an input value to 64 bit if input is 32-bit integer.
  1530. // While selecting instructions in BitPermutationSelector in 64-bit mode,
  1531. // an input value can be a 32-bit integer if a ZERO_EXTEND node is included.
  1532. // In such case, we extend it to 64 bit to be consistent with other values.
  1533. SDValue ExtendToInt64(SDValue V, const SDLoc &dl) {
  1534. if (V.getValueSizeInBits() == 64)
  1535. return V;
  1536. assert(V.getValueSizeInBits() == 32);
  1537. SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
  1538. SDValue ImDef = SDValue(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl,
  1539. MVT::i64), 0);
  1540. SDValue ExtVal = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl,
  1541. MVT::i64, ImDef, V,
  1542. SubRegIdx), 0);
  1543. return ExtVal;
  1544. }
  1545. SDValue TruncateToInt32(SDValue V, const SDLoc &dl) {
  1546. if (V.getValueSizeInBits() == 32)
  1547. return V;
  1548. assert(V.getValueSizeInBits() == 64);
  1549. SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
  1550. SDValue SubVal = SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl,
  1551. MVT::i32, V, SubRegIdx), 0);
  1552. return SubVal;
  1553. }
  1554. // Depending on the number of groups for a particular value, it might be
  1555. // better to rotate, mask explicitly (using andi/andis), and then or the
  1556. // result. Select this part of the result first.
  1557. void SelectAndParts32(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {
  1558. if (BPermRewriterNoMasking)
  1559. return;
  1560. for (ValueRotInfo &VRI : ValueRotsVec) {
  1561. unsigned Mask = 0;
  1562. for (unsigned i = 0; i < Bits.size(); ++i) {
  1563. if (!Bits[i].hasValue() || Bits[i].getValue() != VRI.V)
  1564. continue;
  1565. if (RLAmt[i] != VRI.RLAmt)
  1566. continue;
  1567. Mask |= (1u << i);
  1568. }
  1569. // Compute the masks for andi/andis that would be necessary.
  1570. unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
  1571. assert((ANDIMask != 0 || ANDISMask != 0) &&
  1572. "No set bits in mask for value bit groups");
  1573. bool NeedsRotate = VRI.RLAmt != 0;
  1574. // We're trying to minimize the number of instructions. If we have one
  1575. // group, using one of andi/andis can break even. If we have three
  1576. // groups, we can use both andi and andis and break even (to use both
  1577. // andi and andis we also need to or the results together). We need four
  1578. // groups if we also need to rotate. To use andi/andis we need to do more
  1579. // than break even because rotate-and-mask instructions tend to be easier
  1580. // to schedule.
  1581. // FIXME: We've biased here against using andi/andis, which is right for
  1582. // POWER cores, but not optimal everywhere. For example, on the A2,
  1583. // andi/andis have single-cycle latency whereas the rotate-and-mask
  1584. // instructions take two cycles, and it would be better to bias toward
  1585. // andi/andis in break-even cases.
  1586. unsigned NumAndInsts = (unsigned) NeedsRotate +
  1587. (unsigned) (ANDIMask != 0) +
  1588. (unsigned) (ANDISMask != 0) +
  1589. (unsigned) (ANDIMask != 0 && ANDISMask != 0) +
  1590. (unsigned) (bool) Res;
  1591. LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode()
  1592. << " RL: " << VRI.RLAmt << ":"
  1593. << "\n\t\t\tisel using masking: " << NumAndInsts
  1594. << " using rotates: " << VRI.NumGroups << "\n");
  1595. if (NumAndInsts >= VRI.NumGroups)
  1596. continue;
  1597. LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n");
  1598. if (InstCnt) *InstCnt += NumAndInsts;
  1599. SDValue VRot;
  1600. if (VRI.RLAmt) {
  1601. SDValue Ops[] =
  1602. { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),
  1603. getI32Imm(0, dl), getI32Imm(31, dl) };
  1604. VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
  1605. Ops), 0);
  1606. } else {
  1607. VRot = TruncateToInt32(VRI.V, dl);
  1608. }
  1609. SDValue ANDIVal, ANDISVal;
  1610. if (ANDIMask != 0)
  1611. ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32,
  1612. VRot, getI32Imm(ANDIMask, dl)),
  1613. 0);
  1614. if (ANDISMask != 0)
  1615. ANDISVal =
  1616. SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, VRot,
  1617. getI32Imm(ANDISMask, dl)),
  1618. 0);
  1619. SDValue TotalVal;
  1620. if (!ANDIVal)
  1621. TotalVal = ANDISVal;
  1622. else if (!ANDISVal)
  1623. TotalVal = ANDIVal;
  1624. else
  1625. TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
  1626. ANDIVal, ANDISVal), 0);
  1627. if (!Res)
  1628. Res = TotalVal;
  1629. else
  1630. Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
  1631. Res, TotalVal), 0);
  1632. // Now, remove all groups with this underlying value and rotation
  1633. // factor.
  1634. eraseMatchingBitGroups([VRI](const BitGroup &BG) {
  1635. return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
  1636. });
  1637. }
  1638. }
  1639. // Instruction selection for the 32-bit case.
  1640. SDNode *Select32(SDNode *N, bool LateMask, unsigned *InstCnt) {
  1641. SDLoc dl(N);
  1642. SDValue Res;
  1643. if (InstCnt) *InstCnt = 0;
  1644. // Take care of cases that should use andi/andis first.
  1645. SelectAndParts32(dl, Res, InstCnt);
  1646. // If we've not yet selected a 'starting' instruction, and we have no zeros
  1647. // to fill in, select the (Value, RLAmt) with the highest priority (largest
  1648. // number of groups), and start with this rotated value.
  1649. if ((!NeedMask || LateMask) && !Res) {
  1650. ValueRotInfo &VRI = ValueRotsVec[0];
  1651. if (VRI.RLAmt) {
  1652. if (InstCnt) *InstCnt += 1;
  1653. SDValue Ops[] =
  1654. { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),
  1655. getI32Imm(0, dl), getI32Imm(31, dl) };
  1656. Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops),
  1657. 0);
  1658. } else {
  1659. Res = TruncateToInt32(VRI.V, dl);
  1660. }
  1661. // Now, remove all groups with this underlying value and rotation factor.
  1662. eraseMatchingBitGroups([VRI](const BitGroup &BG) {
  1663. return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
  1664. });
  1665. }
  1666. if (InstCnt) *InstCnt += BitGroups.size();
  1667. // Insert the other groups (one at a time).
  1668. for (auto &BG : BitGroups) {
  1669. if (!Res) {
  1670. SDValue Ops[] =
  1671. { TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),
  1672. getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
  1673. getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
  1674. Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
  1675. } else {
  1676. SDValue Ops[] =
  1677. { Res, TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),
  1678. getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
  1679. getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
  1680. Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0);
  1681. }
  1682. }
  1683. if (LateMask) {
  1684. unsigned Mask = (unsigned) getZerosMask();
  1685. unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
  1686. assert((ANDIMask != 0 || ANDISMask != 0) &&
  1687. "No set bits in zeros mask?");
  1688. if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
  1689. (unsigned) (ANDISMask != 0) +
  1690. (unsigned) (ANDIMask != 0 && ANDISMask != 0);
  1691. SDValue ANDIVal, ANDISVal;
  1692. if (ANDIMask != 0)
  1693. ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32,
  1694. Res, getI32Imm(ANDIMask, dl)),
  1695. 0);
  1696. if (ANDISMask != 0)
  1697. ANDISVal =
  1698. SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, Res,
  1699. getI32Imm(ANDISMask, dl)),
  1700. 0);
  1701. if (!ANDIVal)
  1702. Res = ANDISVal;
  1703. else if (!ANDISVal)
  1704. Res = ANDIVal;
  1705. else
  1706. Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
  1707. ANDIVal, ANDISVal), 0);
  1708. }
  1709. return Res.getNode();
  1710. }
  1711. unsigned SelectRotMask64Count(unsigned RLAmt, bool Repl32,
  1712. unsigned MaskStart, unsigned MaskEnd,
  1713. bool IsIns) {
  1714. // In the notation used by the instructions, 'start' and 'end' are reversed
  1715. // because bits are counted from high to low order.
  1716. unsigned InstMaskStart = 64 - MaskEnd - 1,
  1717. InstMaskEnd = 64 - MaskStart - 1;
  1718. if (Repl32)
  1719. return 1;
  1720. if ((!IsIns && (InstMaskEnd == 63 || InstMaskStart == 0)) ||
  1721. InstMaskEnd == 63 - RLAmt)
  1722. return 1;
  1723. return 2;
  1724. }
  1725. // For 64-bit values, not all combinations of rotates and masks are
  1726. // available. Produce one if it is available.
  1727. SDValue SelectRotMask64(SDValue V, const SDLoc &dl, unsigned RLAmt,
  1728. bool Repl32, unsigned MaskStart, unsigned MaskEnd,
  1729. unsigned *InstCnt = nullptr) {
  1730. // In the notation used by the instructions, 'start' and 'end' are reversed
  1731. // because bits are counted from high to low order.
  1732. unsigned InstMaskStart = 64 - MaskEnd - 1,
  1733. InstMaskEnd = 64 - MaskStart - 1;
  1734. if (InstCnt) *InstCnt += 1;
  1735. if (Repl32) {
  1736. // This rotation amount assumes that the lower 32 bits of the quantity
  1737. // are replicated in the high 32 bits by the rotation operator (which is
  1738. // done by rlwinm and friends).
  1739. assert(InstMaskStart >= 32 && "Mask cannot start out of range");
  1740. assert(InstMaskEnd >= 32 && "Mask cannot end out of range");
  1741. SDValue Ops[] =
  1742. { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
  1743. getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) };
  1744. return SDValue(CurDAG->getMachineNode(PPC::RLWINM8, dl, MVT::i64,
  1745. Ops), 0);
  1746. }
  1747. if (InstMaskEnd == 63) {
  1748. SDValue Ops[] =
  1749. { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
  1750. getI32Imm(InstMaskStart, dl) };
  1751. return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Ops), 0);
  1752. }
  1753. if (InstMaskStart == 0) {
  1754. SDValue Ops[] =
  1755. { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
  1756. getI32Imm(InstMaskEnd, dl) };
  1757. return SDValue(CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Ops), 0);
  1758. }
  1759. if (InstMaskEnd == 63 - RLAmt) {
  1760. SDValue Ops[] =
  1761. { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
  1762. getI32Imm(InstMaskStart, dl) };
  1763. return SDValue(CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, Ops), 0);
  1764. }
  1765. // We cannot do this with a single instruction, so we'll use two. The
  1766. // problem is that we're not free to choose both a rotation amount and mask
  1767. // start and end independently. We can choose an arbitrary mask start and
  1768. // end, but then the rotation amount is fixed. Rotation, however, can be
  1769. // inverted, and so by applying an "inverse" rotation first, we can get the
  1770. // desired result.
  1771. if (InstCnt) *InstCnt += 1;
  1772. // The rotation mask for the second instruction must be MaskStart.
  1773. unsigned RLAmt2 = MaskStart;
  1774. // The first instruction must rotate V so that the overall rotation amount
  1775. // is RLAmt.
  1776. unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
  1777. if (RLAmt1)
  1778. V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
  1779. return SelectRotMask64(V, dl, RLAmt2, false, MaskStart, MaskEnd);
  1780. }
  1781. // For 64-bit values, not all combinations of rotates and masks are
  1782. // available. Produce a rotate-mask-and-insert if one is available.
  1783. SDValue SelectRotMaskIns64(SDValue Base, SDValue V, const SDLoc &dl,
  1784. unsigned RLAmt, bool Repl32, unsigned MaskStart,
  1785. unsigned MaskEnd, unsigned *InstCnt = nullptr) {
  1786. // In the notation used by the instructions, 'start' and 'end' are reversed
  1787. // because bits are counted from high to low order.
  1788. unsigned InstMaskStart = 64 - MaskEnd - 1,
  1789. InstMaskEnd = 64 - MaskStart - 1;
  1790. if (InstCnt) *InstCnt += 1;
  1791. if (Repl32) {
  1792. // This rotation amount assumes that the lower 32 bits of the quantity
  1793. // are replicated in the high 32 bits by the rotation operator (which is
  1794. // done by rlwinm and friends).
  1795. assert(InstMaskStart >= 32 && "Mask cannot start out of range");
  1796. assert(InstMaskEnd >= 32 && "Mask cannot end out of range");
  1797. SDValue Ops[] =
  1798. { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
  1799. getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) };
  1800. return SDValue(CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64,
  1801. Ops), 0);
  1802. }
  1803. if (InstMaskEnd == 63 - RLAmt) {
  1804. SDValue Ops[] =
  1805. { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
  1806. getI32Imm(InstMaskStart, dl) };
  1807. return SDValue(CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops), 0);
  1808. }
  1809. // We cannot do this with a single instruction, so we'll use two. The
  1810. // problem is that we're not free to choose both a rotation amount and mask
  1811. // start and end independently. We can choose an arbitrary mask start and
  1812. // end, but then the rotation amount is fixed. Rotation, however, can be
  1813. // inverted, and so by applying an "inverse" rotation first, we can get the
  1814. // desired result.
  1815. if (InstCnt) *InstCnt += 1;
  1816. // The rotation mask for the second instruction must be MaskStart.
  1817. unsigned RLAmt2 = MaskStart;
  1818. // The first instruction must rotate V so that the overall rotation amount
  1819. // is RLAmt.
  1820. unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
  1821. if (RLAmt1)
  1822. V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
  1823. return SelectRotMaskIns64(Base, V, dl, RLAmt2, false, MaskStart, MaskEnd);
  1824. }
  1825. void SelectAndParts64(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {
  1826. if (BPermRewriterNoMasking)
  1827. return;
  1828. // The idea here is the same as in the 32-bit version, but with additional
  1829. // complications from the fact that Repl32 might be true. Because we
  1830. // aggressively convert bit groups to Repl32 form (which, for small
  1831. // rotation factors, involves no other change), and then coalesce, it might
  1832. // be the case that a single 64-bit masking operation could handle both
  1833. // some Repl32 groups and some non-Repl32 groups. If converting to Repl32
  1834. // form allowed coalescing, then we must use a 32-bit rotaton in order to
  1835. // completely capture the new combined bit group.
  1836. for (ValueRotInfo &VRI : ValueRotsVec) {
  1837. uint64_t Mask = 0;
  1838. // We need to add to the mask all bits from the associated bit groups.
  1839. // If Repl32 is false, we need to add bits from bit groups that have
  1840. // Repl32 true, but are trivially convertable to Repl32 false. Such a
  1841. // group is trivially convertable if it overlaps only with the lower 32
  1842. // bits, and the group has not been coalesced.
  1843. auto MatchingBG = [VRI](const BitGroup &BG) {
  1844. if (VRI.V != BG.V)
  1845. return false;
  1846. unsigned EffRLAmt = BG.RLAmt;
  1847. if (!VRI.Repl32 && BG.Repl32) {
  1848. if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx <= BG.EndIdx &&
  1849. !BG.Repl32Coalesced) {
  1850. if (BG.Repl32CR)
  1851. EffRLAmt += 32;
  1852. } else {
  1853. return false;
  1854. }
  1855. } else if (VRI.Repl32 != BG.Repl32) {
  1856. return false;
  1857. }
  1858. return VRI.RLAmt == EffRLAmt;
  1859. };
  1860. for (auto &BG : BitGroups) {
  1861. if (!MatchingBG(BG))
  1862. continue;
  1863. if (BG.StartIdx <= BG.EndIdx) {
  1864. for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i)
  1865. Mask |= (UINT64_C(1) << i);
  1866. } else {
  1867. for (unsigned i = BG.StartIdx; i < Bits.size(); ++i)
  1868. Mask |= (UINT64_C(1) << i);
  1869. for (unsigned i = 0; i <= BG.EndIdx; ++i)
  1870. Mask |= (UINT64_C(1) << i);
  1871. }
  1872. }
  1873. // We can use the 32-bit andi/andis technique if the mask does not
  1874. // require any higher-order bits. This can save an instruction compared
  1875. // to always using the general 64-bit technique.
  1876. bool Use32BitInsts = isUInt<32>(Mask);
  1877. // Compute the masks for andi/andis that would be necessary.
  1878. unsigned ANDIMask = (Mask & UINT16_MAX),
  1879. ANDISMask = (Mask >> 16) & UINT16_MAX;
  1880. bool NeedsRotate = VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask));
  1881. unsigned NumAndInsts = (unsigned) NeedsRotate +
  1882. (unsigned) (bool) Res;
  1883. unsigned NumOfSelectInsts = 0;
  1884. selectI64Imm(CurDAG, dl, Mask, &NumOfSelectInsts);
  1885. assert(NumOfSelectInsts > 0 && "Failed to select an i64 constant.");
  1886. if (Use32BitInsts)
  1887. NumAndInsts += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) +
  1888. (unsigned) (ANDIMask != 0 && ANDISMask != 0);
  1889. else
  1890. NumAndInsts += NumOfSelectInsts + /* and */ 1;
  1891. unsigned NumRLInsts = 0;
  1892. bool FirstBG = true;
  1893. bool MoreBG = false;
  1894. for (auto &BG : BitGroups) {
  1895. if (!MatchingBG(BG)) {
  1896. MoreBG = true;
  1897. continue;
  1898. }
  1899. NumRLInsts +=
  1900. SelectRotMask64Count(BG.RLAmt, BG.Repl32, BG.StartIdx, BG.EndIdx,
  1901. !FirstBG);
  1902. FirstBG = false;
  1903. }
  1904. LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode()
  1905. << " RL: " << VRI.RLAmt << (VRI.Repl32 ? " (32):" : ":")
  1906. << "\n\t\t\tisel using masking: " << NumAndInsts
  1907. << " using rotates: " << NumRLInsts << "\n");
  1908. // When we'd use andi/andis, we bias toward using the rotates (andi only
  1909. // has a record form, and is cracked on POWER cores). However, when using
  1910. // general 64-bit constant formation, bias toward the constant form,
  1911. // because that exposes more opportunities for CSE.
  1912. if (NumAndInsts > NumRLInsts)
  1913. continue;
  1914. // When merging multiple bit groups, instruction or is used.
  1915. // But when rotate is used, rldimi can inert the rotated value into any
  1916. // register, so instruction or can be avoided.
  1917. if ((Use32BitInsts || MoreBG) && NumAndInsts == NumRLInsts)
  1918. continue;
  1919. LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n");
  1920. if (InstCnt) *InstCnt += NumAndInsts;
  1921. SDValue VRot;
  1922. // We actually need to generate a rotation if we have a non-zero rotation
  1923. // factor or, in the Repl32 case, if we care about any of the
  1924. // higher-order replicated bits. In the latter case, we generate a mask
  1925. // backward so that it actually includes the entire 64 bits.
  1926. if (VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask)))
  1927. VRot = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
  1928. VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63);
  1929. else
  1930. VRot = VRI.V;
  1931. SDValue TotalVal;
  1932. if (Use32BitInsts) {
  1933. assert((ANDIMask != 0 || ANDISMask != 0) &&
  1934. "No set bits in mask when using 32-bit ands for 64-bit value");
  1935. SDValue ANDIVal, ANDISVal;
  1936. if (ANDIMask != 0)
  1937. ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64,
  1938. ExtendToInt64(VRot, dl),
  1939. getI32Imm(ANDIMask, dl)),
  1940. 0);
  1941. if (ANDISMask != 0)
  1942. ANDISVal =
  1943. SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64,
  1944. ExtendToInt64(VRot, dl),
  1945. getI32Imm(ANDISMask, dl)),
  1946. 0);
  1947. if (!ANDIVal)
  1948. TotalVal = ANDISVal;
  1949. else if (!ANDISVal)
  1950. TotalVal = ANDIVal;
  1951. else
  1952. TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
  1953. ExtendToInt64(ANDIVal, dl), ANDISVal), 0);
  1954. } else {
  1955. TotalVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0);
  1956. TotalVal =
  1957. SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
  1958. ExtendToInt64(VRot, dl), TotalVal),
  1959. 0);
  1960. }
  1961. if (!Res)
  1962. Res = TotalVal;
  1963. else
  1964. Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
  1965. ExtendToInt64(Res, dl), TotalVal),
  1966. 0);
  1967. // Now, remove all groups with this underlying value and rotation
  1968. // factor.
  1969. eraseMatchingBitGroups(MatchingBG);
  1970. }
  1971. }
  1972. // Instruction selection for the 64-bit case.
  1973. SDNode *Select64(SDNode *N, bool LateMask, unsigned *InstCnt) {
  1974. SDLoc dl(N);
  1975. SDValue Res;
  1976. if (InstCnt) *InstCnt = 0;
  1977. // Take care of cases that should use andi/andis first.
  1978. SelectAndParts64(dl, Res, InstCnt);
  1979. // If we've not yet selected a 'starting' instruction, and we have no zeros
  1980. // to fill in, select the (Value, RLAmt) with the highest priority (largest
  1981. // number of groups), and start with this rotated value.
  1982. if ((!NeedMask || LateMask) && !Res) {
  1983. // If we have both Repl32 groups and non-Repl32 groups, the non-Repl32
  1984. // groups will come first, and so the VRI representing the largest number
  1985. // of groups might not be first (it might be the first Repl32 groups).
  1986. unsigned MaxGroupsIdx = 0;
  1987. if (!ValueRotsVec[0].Repl32) {
  1988. for (unsigned i = 0, ie = ValueRotsVec.size(); i < ie; ++i)
  1989. if (ValueRotsVec[i].Repl32) {
  1990. if (ValueRotsVec[i].NumGroups > ValueRotsVec[0].NumGroups)
  1991. MaxGroupsIdx = i;
  1992. break;
  1993. }
  1994. }
  1995. ValueRotInfo &VRI = ValueRotsVec[MaxGroupsIdx];
  1996. bool NeedsRotate = false;
  1997. if (VRI.RLAmt) {
  1998. NeedsRotate = true;
  1999. } else if (VRI.Repl32) {
  2000. for (auto &BG : BitGroups) {
  2001. if (BG.V != VRI.V || BG.RLAmt != VRI.RLAmt ||
  2002. BG.Repl32 != VRI.Repl32)
  2003. continue;
  2004. // We don't need a rotate if the bit group is confined to the lower
  2005. // 32 bits.
  2006. if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx < BG.EndIdx)
  2007. continue;
  2008. NeedsRotate = true;
  2009. break;
  2010. }
  2011. }
  2012. if (NeedsRotate)
  2013. Res = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
  2014. VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63,
  2015. InstCnt);
  2016. else
  2017. Res = VRI.V;
  2018. // Now, remove all groups with this underlying value and rotation factor.
  2019. if (Res)
  2020. eraseMatchingBitGroups([VRI](const BitGroup &BG) {
  2021. return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt &&
  2022. BG.Repl32 == VRI.Repl32;
  2023. });
  2024. }
  2025. // Because 64-bit rotates are more flexible than inserts, we might have a
  2026. // preference regarding which one we do first (to save one instruction).
  2027. if (!Res)
  2028. for (auto I = BitGroups.begin(), IE = BitGroups.end(); I != IE; ++I) {
  2029. if (SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
  2030. false) <
  2031. SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
  2032. true)) {
  2033. if (I != BitGroups.begin()) {
  2034. BitGroup BG = *I;
  2035. BitGroups.erase(I);
  2036. BitGroups.insert(BitGroups.begin(), BG);
  2037. }
  2038. break;
  2039. }
  2040. }
  2041. // Insert the other groups (one at a time).
  2042. for (auto &BG : BitGroups) {
  2043. if (!Res)
  2044. Res = SelectRotMask64(BG.V, dl, BG.RLAmt, BG.Repl32, BG.StartIdx,
  2045. BG.EndIdx, InstCnt);
  2046. else
  2047. Res = SelectRotMaskIns64(Res, BG.V, dl, BG.RLAmt, BG.Repl32,
  2048. BG.StartIdx, BG.EndIdx, InstCnt);
  2049. }
  2050. if (LateMask) {
  2051. uint64_t Mask = getZerosMask();
  2052. // We can use the 32-bit andi/andis technique if the mask does not
  2053. // require any higher-order bits. This can save an instruction compared
  2054. // to always using the general 64-bit technique.
  2055. bool Use32BitInsts = isUInt<32>(Mask);
  2056. // Compute the masks for andi/andis that would be necessary.
  2057. unsigned ANDIMask = (Mask & UINT16_MAX),
  2058. ANDISMask = (Mask >> 16) & UINT16_MAX;
  2059. if (Use32BitInsts) {
  2060. assert((ANDIMask != 0 || ANDISMask != 0) &&
  2061. "No set bits in mask when using 32-bit ands for 64-bit value");
  2062. if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
  2063. (unsigned) (ANDISMask != 0) +
  2064. (unsigned) (ANDIMask != 0 && ANDISMask != 0);
  2065. SDValue ANDIVal, ANDISVal;
  2066. if (ANDIMask != 0)
  2067. ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64,
  2068. ExtendToInt64(Res, dl),
  2069. getI32Imm(ANDIMask, dl)),
  2070. 0);
  2071. if (ANDISMask != 0)
  2072. ANDISVal =
  2073. SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64,
  2074. ExtendToInt64(Res, dl),
  2075. getI32Imm(ANDISMask, dl)),
  2076. 0);
  2077. if (!ANDIVal)
  2078. Res = ANDISVal;
  2079. else if (!ANDISVal)
  2080. Res = ANDIVal;
  2081. else
  2082. Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
  2083. ExtendToInt64(ANDIVal, dl), ANDISVal), 0);
  2084. } else {
  2085. unsigned NumOfSelectInsts = 0;
  2086. SDValue MaskVal =
  2087. SDValue(selectI64Imm(CurDAG, dl, Mask, &NumOfSelectInsts), 0);
  2088. Res = SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
  2089. ExtendToInt64(Res, dl), MaskVal),
  2090. 0);
  2091. if (InstCnt)
  2092. *InstCnt += NumOfSelectInsts + /* and */ 1;
  2093. }
  2094. }
  2095. return Res.getNode();
  2096. }
  2097. SDNode *Select(SDNode *N, bool LateMask, unsigned *InstCnt = nullptr) {
  2098. // Fill in BitGroups.
  2099. collectBitGroups(LateMask);
  2100. if (BitGroups.empty())
  2101. return nullptr;
  2102. // For 64-bit values, figure out when we can use 32-bit instructions.
  2103. if (Bits.size() == 64)
  2104. assignRepl32BitGroups();
  2105. // Fill in ValueRotsVec.
  2106. collectValueRotInfo();
  2107. if (Bits.size() == 32) {
  2108. return Select32(N, LateMask, InstCnt);
  2109. } else {
  2110. assert(Bits.size() == 64 && "Not 64 bits here?");
  2111. return Select64(N, LateMask, InstCnt);
  2112. }
  2113. return nullptr;
  2114. }
  2115. void eraseMatchingBitGroups(function_ref<bool(const BitGroup &)> F) {
  2116. erase_if(BitGroups, F);
  2117. }
  2118. SmallVector<ValueBit, 64> Bits;
  2119. bool NeedMask = false;
  2120. SmallVector<unsigned, 64> RLAmt;
  2121. SmallVector<BitGroup, 16> BitGroups;
  2122. DenseMap<std::pair<SDValue, unsigned>, ValueRotInfo> ValueRots;
  2123. SmallVector<ValueRotInfo, 16> ValueRotsVec;
  2124. SelectionDAG *CurDAG = nullptr;
  2125. public:
  2126. BitPermutationSelector(SelectionDAG *DAG)
  2127. : CurDAG(DAG) {}
  2128. // Here we try to match complex bit permutations into a set of
  2129. // rotate-and-shift/shift/and/or instructions, using a set of heuristics
  2130. // known to produce optimal code for common cases (like i32 byte swapping).
  2131. SDNode *Select(SDNode *N) {
  2132. Memoizer.clear();
  2133. auto Result =
  2134. getValueBits(SDValue(N, 0), N->getValueType(0).getSizeInBits());
  2135. if (!Result.first)
  2136. return nullptr;
  2137. Bits = std::move(*Result.second);
  2138. LLVM_DEBUG(dbgs() << "Considering bit-permutation-based instruction"
  2139. " selection for: ");
  2140. LLVM_DEBUG(N->dump(CurDAG));
  2141. // Fill it RLAmt and set NeedMask.
  2142. computeRotationAmounts();
  2143. if (!NeedMask)
  2144. return Select(N, false);
  2145. // We currently have two techniques for handling results with zeros: early
  2146. // masking (the default) and late masking. Late masking is sometimes more
  2147. // efficient, but because the structure of the bit groups is different, it
  2148. // is hard to tell without generating both and comparing the results. With
  2149. // late masking, we ignore zeros in the resulting value when inserting each
  2150. // set of bit groups, and then mask in the zeros at the end. With early
  2151. // masking, we only insert the non-zero parts of the result at every step.
  2152. unsigned InstCnt = 0, InstCntLateMask = 0;
  2153. LLVM_DEBUG(dbgs() << "\tEarly masking:\n");
  2154. SDNode *RN = Select(N, false, &InstCnt);
  2155. LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCnt << " instructions\n");
  2156. LLVM_DEBUG(dbgs() << "\tLate masking:\n");
  2157. SDNode *RNLM = Select(N, true, &InstCntLateMask);
  2158. LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCntLateMask
  2159. << " instructions\n");
  2160. if (InstCnt <= InstCntLateMask) {
  2161. LLVM_DEBUG(dbgs() << "\tUsing early-masking for isel\n");
  2162. return RN;
  2163. }
  2164. LLVM_DEBUG(dbgs() << "\tUsing late-masking for isel\n");
  2165. return RNLM;
  2166. }
  2167. };
  2168. class IntegerCompareEliminator {
  2169. SelectionDAG *CurDAG;
  2170. PPCDAGToDAGISel *S;
  2171. // Conversion type for interpreting results of a 32-bit instruction as
  2172. // a 64-bit value or vice versa.
  2173. enum ExtOrTruncConversion { Ext, Trunc };
  2174. // Modifiers to guide how an ISD::SETCC node's result is to be computed
  2175. // in a GPR.
  2176. // ZExtOrig - use the original condition code, zero-extend value
  2177. // ZExtInvert - invert the condition code, zero-extend value
  2178. // SExtOrig - use the original condition code, sign-extend value
  2179. // SExtInvert - invert the condition code, sign-extend value
  2180. enum SetccInGPROpts { ZExtOrig, ZExtInvert, SExtOrig, SExtInvert };
  2181. // Comparisons against zero to emit GPR code sequences for. Each of these
  2182. // sequences may need to be emitted for two or more equivalent patterns.
  2183. // For example (a >= 0) == (a > -1). The direction of the comparison (</>)
  2184. // matters as well as the extension type: sext (-1/0), zext (1/0).
  2185. // GEZExt - (zext (LHS >= 0))
  2186. // GESExt - (sext (LHS >= 0))
  2187. // LEZExt - (zext (LHS <= 0))
  2188. // LESExt - (sext (LHS <= 0))
  2189. enum ZeroCompare { GEZExt, GESExt, LEZExt, LESExt };
  2190. SDNode *tryEXTEND(SDNode *N);
  2191. SDNode *tryLogicOpOfCompares(SDNode *N);
  2192. SDValue computeLogicOpInGPR(SDValue LogicOp);
  2193. SDValue signExtendInputIfNeeded(SDValue Input);
  2194. SDValue zeroExtendInputIfNeeded(SDValue Input);
  2195. SDValue addExtOrTrunc(SDValue NatWidthRes, ExtOrTruncConversion Conv);
  2196. SDValue getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,
  2197. ZeroCompare CmpTy);
  2198. SDValue get32BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
  2199. int64_t RHSValue, SDLoc dl);
  2200. SDValue get32BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
  2201. int64_t RHSValue, SDLoc dl);
  2202. SDValue get64BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
  2203. int64_t RHSValue, SDLoc dl);
  2204. SDValue get64BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
  2205. int64_t RHSValue, SDLoc dl);
  2206. SDValue getSETCCInGPR(SDValue Compare, SetccInGPROpts ConvOpts);
  2207. public:
  2208. IntegerCompareEliminator(SelectionDAG *DAG,
  2209. PPCDAGToDAGISel *Sel) : CurDAG(DAG), S(Sel) {
  2210. assert(CurDAG->getTargetLoweringInfo()
  2211. .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 &&
  2212. "Only expecting to use this on 64 bit targets.");
  2213. }
  2214. SDNode *Select(SDNode *N) {
  2215. if (CmpInGPR == ICGPR_None)
  2216. return nullptr;
  2217. switch (N->getOpcode()) {
  2218. default: break;
  2219. case ISD::ZERO_EXTEND:
  2220. if (CmpInGPR == ICGPR_Sext || CmpInGPR == ICGPR_SextI32 ||
  2221. CmpInGPR == ICGPR_SextI64)
  2222. return nullptr;
  2223. LLVM_FALLTHROUGH;
  2224. case ISD::SIGN_EXTEND:
  2225. if (CmpInGPR == ICGPR_Zext || CmpInGPR == ICGPR_ZextI32 ||
  2226. CmpInGPR == ICGPR_ZextI64)
  2227. return nullptr;
  2228. return tryEXTEND(N);
  2229. case ISD::AND:
  2230. case ISD::OR:
  2231. case ISD::XOR:
  2232. return tryLogicOpOfCompares(N);
  2233. }
  2234. return nullptr;
  2235. }
  2236. };
  2237. static bool isLogicOp(unsigned Opc) {
  2238. return Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR;
  2239. }
  2240. // The obvious case for wanting to keep the value in a GPR. Namely, the
  2241. // result of the comparison is actually needed in a GPR.
  2242. SDNode *IntegerCompareEliminator::tryEXTEND(SDNode *N) {
  2243. assert((N->getOpcode() == ISD::ZERO_EXTEND ||
  2244. N->getOpcode() == ISD::SIGN_EXTEND) &&
  2245. "Expecting a zero/sign extend node!");
  2246. SDValue WideRes;
  2247. // If we are zero-extending the result of a logical operation on i1
  2248. // values, we can keep the values in GPRs.
  2249. if (isLogicOp(N->getOperand(0).getOpcode()) &&
  2250. N->getOperand(0).getValueType() == MVT::i1 &&
  2251. N->getOpcode() == ISD::ZERO_EXTEND)
  2252. WideRes = computeLogicOpInGPR(N->getOperand(0));
  2253. else if (N->getOperand(0).getOpcode() != ISD::SETCC)
  2254. return nullptr;
  2255. else
  2256. WideRes =
  2257. getSETCCInGPR(N->getOperand(0),
  2258. N->getOpcode() == ISD::SIGN_EXTEND ?
  2259. SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig);
  2260. if (!WideRes)
  2261. return nullptr;
  2262. SDLoc dl(N);
  2263. bool Input32Bit = WideRes.getValueType() == MVT::i32;
  2264. bool Output32Bit = N->getValueType(0) == MVT::i32;
  2265. NumSextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 1 : 0;
  2266. NumZextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 0 : 1;
  2267. SDValue ConvOp = WideRes;
  2268. if (Input32Bit != Output32Bit)
  2269. ConvOp = addExtOrTrunc(WideRes, Input32Bit ? ExtOrTruncConversion::Ext :
  2270. ExtOrTruncConversion::Trunc);
  2271. return ConvOp.getNode();
  2272. }
  2273. // Attempt to perform logical operations on the results of comparisons while
  2274. // keeping the values in GPRs. Without doing so, these would end up being
  2275. // lowered to CR-logical operations which suffer from significant latency and
  2276. // low ILP.
  2277. SDNode *IntegerCompareEliminator::tryLogicOpOfCompares(SDNode *N) {
  2278. if (N->getValueType(0) != MVT::i1)
  2279. return nullptr;
  2280. assert(isLogicOp(N->getOpcode()) &&
  2281. "Expected a logic operation on setcc results.");
  2282. SDValue LoweredLogical = computeLogicOpInGPR(SDValue(N, 0));
  2283. if (!LoweredLogical)
  2284. return nullptr;
  2285. SDLoc dl(N);
  2286. bool IsBitwiseNegate = LoweredLogical.getMachineOpcode() == PPC::XORI8;
  2287. unsigned SubRegToExtract = IsBitwiseNegate ? PPC::sub_eq : PPC::sub_gt;
  2288. SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
  2289. SDValue LHS = LoweredLogical.getOperand(0);
  2290. SDValue RHS = LoweredLogical.getOperand(1);
  2291. SDValue WideOp;
  2292. SDValue OpToConvToRecForm;
  2293. // Look through any 32-bit to 64-bit implicit extend nodes to find the
  2294. // opcode that is input to the XORI.
  2295. if (IsBitwiseNegate &&
  2296. LoweredLogical.getOperand(0).getMachineOpcode() == PPC::INSERT_SUBREG)
  2297. OpToConvToRecForm = LoweredLogical.getOperand(0).getOperand(1);
  2298. else if (IsBitwiseNegate)
  2299. // If the input to the XORI isn't an extension, that's what we're after.
  2300. OpToConvToRecForm = LoweredLogical.getOperand(0);
  2301. else
  2302. // If this is not an XORI, it is a reg-reg logical op and we can convert
  2303. // it to record-form.
  2304. OpToConvToRecForm = LoweredLogical;
  2305. // Get the record-form version of the node we're looking to use to get the
  2306. // CR result from.
  2307. uint16_t NonRecOpc = OpToConvToRecForm.getMachineOpcode();
  2308. int NewOpc = PPCInstrInfo::getRecordFormOpcode(NonRecOpc);
  2309. // Convert the right node to record-form. This is either the logical we're
  2310. // looking at or it is the input node to the negation (if we're looking at
  2311. // a bitwise negation).
  2312. if (NewOpc != -1 && IsBitwiseNegate) {
  2313. // The input to the XORI has a record-form. Use it.
  2314. assert(LoweredLogical.getConstantOperandVal(1) == 1 &&
  2315. "Expected a PPC::XORI8 only for bitwise negation.");
  2316. // Emit the record-form instruction.
  2317. std::vector<SDValue> Ops;
  2318. for (int i = 0, e = OpToConvToRecForm.getNumOperands(); i < e; i++)
  2319. Ops.push_back(OpToConvToRecForm.getOperand(i));
  2320. WideOp =
  2321. SDValue(CurDAG->getMachineNode(NewOpc, dl,
  2322. OpToConvToRecForm.getValueType(),
  2323. MVT::Glue, Ops), 0);
  2324. } else {
  2325. assert((NewOpc != -1 || !IsBitwiseNegate) &&
  2326. "No record form available for AND8/OR8/XOR8?");
  2327. WideOp =
  2328. SDValue(CurDAG->getMachineNode(NewOpc == -1 ? PPC::ANDI8_rec : NewOpc,
  2329. dl, MVT::i64, MVT::Glue, LHS, RHS),
  2330. 0);
  2331. }
  2332. // Select this node to a single bit from CR0 set by the record-form node
  2333. // just created. For bitwise negation, use the EQ bit which is the equivalent
  2334. // of negating the result (i.e. it is a bit set when the result of the
  2335. // operation is zero).
  2336. SDValue SRIdxVal =
  2337. CurDAG->getTargetConstant(SubRegToExtract, dl, MVT::i32);
  2338. SDValue CRBit =
  2339. SDValue(CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
  2340. MVT::i1, CR0Reg, SRIdxVal,
  2341. WideOp.getValue(1)), 0);
  2342. return CRBit.getNode();
  2343. }
  2344. // Lower a logical operation on i1 values into a GPR sequence if possible.
  2345. // The result can be kept in a GPR if requested.
  2346. // Three types of inputs can be handled:
  2347. // - SETCC
  2348. // - TRUNCATE
  2349. // - Logical operation (AND/OR/XOR)
  2350. // There is also a special case that is handled (namely a complement operation
  2351. // achieved with xor %a, -1).
  2352. SDValue IntegerCompareEliminator::computeLogicOpInGPR(SDValue LogicOp) {
  2353. assert(isLogicOp(LogicOp.getOpcode()) &&
  2354. "Can only handle logic operations here.");
  2355. assert(LogicOp.getValueType() == MVT::i1 &&
  2356. "Can only handle logic operations on i1 values here.");
  2357. SDLoc dl(LogicOp);
  2358. SDValue LHS, RHS;
  2359. // Special case: xor %a, -1
  2360. bool IsBitwiseNegation = isBitwiseNot(LogicOp);
  2361. // Produces a GPR sequence for each operand of the binary logic operation.
  2362. // For SETCC, it produces the respective comparison, for TRUNCATE it truncates
  2363. // the value in a GPR and for logic operations, it will recursively produce
  2364. // a GPR sequence for the operation.
  2365. auto getLogicOperand = [&] (SDValue Operand) -> SDValue {
  2366. unsigned OperandOpcode = Operand.getOpcode();
  2367. if (OperandOpcode == ISD::SETCC)
  2368. return getSETCCInGPR(Operand, SetccInGPROpts::ZExtOrig);
  2369. else if (OperandOpcode == ISD::TRUNCATE) {
  2370. SDValue InputOp = Operand.getOperand(0);
  2371. EVT InVT = InputOp.getValueType();
  2372. return SDValue(CurDAG->getMachineNode(InVT == MVT::i32 ? PPC::RLDICL_32 :
  2373. PPC::RLDICL, dl, InVT, InputOp,
  2374. S->getI64Imm(0, dl),
  2375. S->getI64Imm(63, dl)), 0);
  2376. } else if (isLogicOp(OperandOpcode))
  2377. return computeLogicOpInGPR(Operand);
  2378. return SDValue();
  2379. };
  2380. LHS = getLogicOperand(LogicOp.getOperand(0));
  2381. RHS = getLogicOperand(LogicOp.getOperand(1));
  2382. // If a GPR sequence can't be produced for the LHS we can't proceed.
  2383. // Not producing a GPR sequence for the RHS is only a problem if this isn't
  2384. // a bitwise negation operation.
  2385. if (!LHS || (!RHS && !IsBitwiseNegation))
  2386. return SDValue();
  2387. NumLogicOpsOnComparison++;
  2388. // We will use the inputs as 64-bit values.
  2389. if (LHS.getValueType() == MVT::i32)
  2390. LHS = addExtOrTrunc(LHS, ExtOrTruncConversion::Ext);
  2391. if (!IsBitwiseNegation && RHS.getValueType() == MVT::i32)
  2392. RHS = addExtOrTrunc(RHS, ExtOrTruncConversion::Ext);
  2393. unsigned NewOpc;
  2394. switch (LogicOp.getOpcode()) {
  2395. default: llvm_unreachable("Unknown logic operation.");
  2396. case ISD::AND: NewOpc = PPC::AND8; break;
  2397. case ISD::OR: NewOpc = PPC::OR8; break;
  2398. case ISD::XOR: NewOpc = PPC::XOR8; break;
  2399. }
  2400. if (IsBitwiseNegation) {
  2401. RHS = S->getI64Imm(1, dl);
  2402. NewOpc = PPC::XORI8;
  2403. }
  2404. return SDValue(CurDAG->getMachineNode(NewOpc, dl, MVT::i64, LHS, RHS), 0);
  2405. }
  2406. /// If the value isn't guaranteed to be sign-extended to 64-bits, extend it.
  2407. /// Otherwise just reinterpret it as a 64-bit value.
  2408. /// Useful when emitting comparison code for 32-bit values without using
  2409. /// the compare instruction (which only considers the lower 32-bits).
  2410. SDValue IntegerCompareEliminator::signExtendInputIfNeeded(SDValue Input) {
  2411. assert(Input.getValueType() == MVT::i32 &&
  2412. "Can only sign-extend 32-bit values here.");
  2413. unsigned Opc = Input.getOpcode();
  2414. // The value was sign extended and then truncated to 32-bits. No need to
  2415. // sign extend it again.
  2416. if (Opc == ISD::TRUNCATE &&
  2417. (Input.getOperand(0).getOpcode() == ISD::AssertSext ||
  2418. Input.getOperand(0).getOpcode() == ISD::SIGN_EXTEND))
  2419. return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
  2420. LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
  2421. // The input is a sign-extending load. All ppc sign-extending loads
  2422. // sign-extend to the full 64-bits.
  2423. if (InputLoad && InputLoad->getExtensionType() == ISD::SEXTLOAD)
  2424. return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
  2425. ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);
  2426. // We don't sign-extend constants.
  2427. if (InputConst)
  2428. return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
  2429. SDLoc dl(Input);
  2430. SignExtensionsAdded++;
  2431. return SDValue(CurDAG->getMachineNode(PPC::EXTSW_32_64, dl,
  2432. MVT::i64, Input), 0);
  2433. }
  2434. /// If the value isn't guaranteed to be zero-extended to 64-bits, extend it.
  2435. /// Otherwise just reinterpret it as a 64-bit value.
  2436. /// Useful when emitting comparison code for 32-bit values without using
  2437. /// the compare instruction (which only considers the lower 32-bits).
  2438. SDValue IntegerCompareEliminator::zeroExtendInputIfNeeded(SDValue Input) {
  2439. assert(Input.getValueType() == MVT::i32 &&
  2440. "Can only zero-extend 32-bit values here.");
  2441. unsigned Opc = Input.getOpcode();
  2442. // The only condition under which we can omit the actual extend instruction:
  2443. // - The value is a positive constant
  2444. // - The value comes from a load that isn't a sign-extending load
  2445. // An ISD::TRUNCATE needs to be zero-extended unless it is fed by a zext.
  2446. bool IsTruncateOfZExt = Opc == ISD::TRUNCATE &&
  2447. (Input.getOperand(0).getOpcode() == ISD::AssertZext ||
  2448. Input.getOperand(0).getOpcode() == ISD::ZERO_EXTEND);
  2449. if (IsTruncateOfZExt)
  2450. return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
  2451. ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);
  2452. if (InputConst && InputConst->getSExtValue() >= 0)
  2453. return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
  2454. LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
  2455. // The input is a load that doesn't sign-extend (it will be zero-extended).
  2456. if (InputLoad && InputLoad->getExtensionType() != ISD::SEXTLOAD)
  2457. return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
  2458. // None of the above, need to zero-extend.
  2459. SDLoc dl(Input);
  2460. ZeroExtensionsAdded++;
  2461. return SDValue(CurDAG->getMachineNode(PPC::RLDICL_32_64, dl, MVT::i64, Input,
  2462. S->getI64Imm(0, dl),
  2463. S->getI64Imm(32, dl)), 0);
  2464. }
  2465. // Handle a 32-bit value in a 64-bit register and vice-versa. These are of
  2466. // course not actual zero/sign extensions that will generate machine code,
  2467. // they're just a way to reinterpret a 32 bit value in a register as a
  2468. // 64 bit value and vice-versa.
  2469. SDValue IntegerCompareEliminator::addExtOrTrunc(SDValue NatWidthRes,
  2470. ExtOrTruncConversion Conv) {
  2471. SDLoc dl(NatWidthRes);
  2472. // For reinterpreting 32-bit values as 64 bit values, we generate
  2473. // INSERT_SUBREG IMPLICIT_DEF:i64, <input>, TargetConstant:i32<1>
  2474. if (Conv == ExtOrTruncConversion::Ext) {
  2475. SDValue ImDef(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, MVT::i64), 0);
  2476. SDValue SubRegIdx =
  2477. CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
  2478. return SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, MVT::i64,
  2479. ImDef, NatWidthRes, SubRegIdx), 0);
  2480. }
  2481. assert(Conv == ExtOrTruncConversion::Trunc &&
  2482. "Unknown convertion between 32 and 64 bit values.");
  2483. // For reinterpreting 64-bit values as 32-bit values, we just need to
  2484. // EXTRACT_SUBREG (i.e. extract the low word).
  2485. SDValue SubRegIdx =
  2486. CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
  2487. return SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, MVT::i32,
  2488. NatWidthRes, SubRegIdx), 0);
  2489. }
  2490. // Produce a GPR sequence for compound comparisons (<=, >=) against zero.
  2491. // Handle both zero-extensions and sign-extensions.
  2492. SDValue
  2493. IntegerCompareEliminator::getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,
  2494. ZeroCompare CmpTy) {
  2495. EVT InVT = LHS.getValueType();
  2496. bool Is32Bit = InVT == MVT::i32;
  2497. SDValue ToExtend;
  2498. // Produce the value that needs to be either zero or sign extended.
  2499. switch (CmpTy) {
  2500. case ZeroCompare::GEZExt:
  2501. case ZeroCompare::GESExt:
  2502. ToExtend = SDValue(CurDAG->getMachineNode(Is32Bit ? PPC::NOR : PPC::NOR8,
  2503. dl, InVT, LHS, LHS), 0);
  2504. break;
  2505. case ZeroCompare::LEZExt:
  2506. case ZeroCompare::LESExt: {
  2507. if (Is32Bit) {
  2508. // Upper 32 bits cannot be undefined for this sequence.
  2509. LHS = signExtendInputIfNeeded(LHS);
  2510. SDValue Neg =
  2511. SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
  2512. ToExtend =
  2513. SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
  2514. Neg, S->getI64Imm(1, dl),
  2515. S->getI64Imm(63, dl)), 0);
  2516. } else {
  2517. SDValue Addi =
  2518. SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
  2519. S->getI64Imm(~0ULL, dl)), 0);
  2520. ToExtend = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
  2521. Addi, LHS), 0);
  2522. }
  2523. break;
  2524. }
  2525. }
  2526. // For 64-bit sequences, the extensions are the same for the GE/LE cases.
  2527. if (!Is32Bit &&
  2528. (CmpTy == ZeroCompare::GEZExt || CmpTy == ZeroCompare::LEZExt))
  2529. return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
  2530. ToExtend, S->getI64Imm(1, dl),
  2531. S->getI64Imm(63, dl)), 0);
  2532. if (!Is32Bit &&
  2533. (CmpTy == ZeroCompare::GESExt || CmpTy == ZeroCompare::LESExt))
  2534. return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, ToExtend,
  2535. S->getI64Imm(63, dl)), 0);
  2536. assert(Is32Bit && "Should have handled the 32-bit sequences above.");
  2537. // For 32-bit sequences, the extensions differ between GE/LE cases.
  2538. switch (CmpTy) {
  2539. case ZeroCompare::GEZExt: {
  2540. SDValue ShiftOps[] = { ToExtend, S->getI32Imm(1, dl), S->getI32Imm(31, dl),
  2541. S->getI32Imm(31, dl) };
  2542. return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
  2543. ShiftOps), 0);
  2544. }
  2545. case ZeroCompare::GESExt:
  2546. return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, ToExtend,
  2547. S->getI32Imm(31, dl)), 0);
  2548. case ZeroCompare::LEZExt:
  2549. return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, ToExtend,
  2550. S->getI32Imm(1, dl)), 0);
  2551. case ZeroCompare::LESExt:
  2552. return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, ToExtend,
  2553. S->getI32Imm(-1, dl)), 0);
  2554. }
  2555. // The above case covers all the enumerators so it can't have a default clause
  2556. // to avoid compiler warnings.
  2557. llvm_unreachable("Unknown zero-comparison type.");
  2558. }
  2559. /// Produces a zero-extended result of comparing two 32-bit values according to
  2560. /// the passed condition code.
  2561. SDValue
  2562. IntegerCompareEliminator::get32BitZExtCompare(SDValue LHS, SDValue RHS,
  2563. ISD::CondCode CC,
  2564. int64_t RHSValue, SDLoc dl) {
  2565. if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 ||
  2566. CmpInGPR == ICGPR_ZextI64 || CmpInGPR == ICGPR_Sext)
  2567. return SDValue();
  2568. bool IsRHSZero = RHSValue == 0;
  2569. bool IsRHSOne = RHSValue == 1;
  2570. bool IsRHSNegOne = RHSValue == -1LL;
  2571. switch (CC) {
  2572. default: return SDValue();
  2573. case ISD::SETEQ: {
  2574. // (zext (setcc %a, %b, seteq)) -> (lshr (cntlzw (xor %a, %b)), 5)
  2575. // (zext (setcc %a, 0, seteq)) -> (lshr (cntlzw %a), 5)
  2576. SDValue Xor = IsRHSZero ? LHS :
  2577. SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
  2578. SDValue Clz =
  2579. SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
  2580. SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl),
  2581. S->getI32Imm(31, dl) };
  2582. return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
  2583. ShiftOps), 0);
  2584. }
  2585. case ISD::SETNE: {
  2586. // (zext (setcc %a, %b, setne)) -> (xor (lshr (cntlzw (xor %a, %b)), 5), 1)
  2587. // (zext (setcc %a, 0, setne)) -> (xor (lshr (cntlzw %a), 5), 1)
  2588. SDValue Xor = IsRHSZero ? LHS :
  2589. SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
  2590. SDValue Clz =
  2591. SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
  2592. SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl),
  2593. S->getI32Imm(31, dl) };
  2594. SDValue Shift =
  2595. SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);
  2596. return SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,
  2597. S->getI32Imm(1, dl)), 0);
  2598. }
  2599. case ISD::SETGE: {
  2600. // (zext (setcc %a, %b, setge)) -> (xor (lshr (sub %a, %b), 63), 1)
  2601. // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 31)
  2602. if(IsRHSZero)
  2603. return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
  2604. // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
  2605. // by swapping inputs and falling through.
  2606. std::swap(LHS, RHS);
  2607. ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
  2608. IsRHSZero = RHSConst && RHSConst->isNullValue();
  2609. LLVM_FALLTHROUGH;
  2610. }
  2611. case ISD::SETLE: {
  2612. if (CmpInGPR == ICGPR_NonExtIn)
  2613. return SDValue();
  2614. // (zext (setcc %a, %b, setle)) -> (xor (lshr (sub %b, %a), 63), 1)
  2615. // (zext (setcc %a, 0, setle)) -> (xor (lshr (- %a), 63), 1)
  2616. if(IsRHSZero) {
  2617. if (CmpInGPR == ICGPR_NonExtIn)
  2618. return SDValue();
  2619. return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
  2620. }
  2621. // The upper 32-bits of the register can't be undefined for this sequence.
  2622. LHS = signExtendInputIfNeeded(LHS);
  2623. RHS = signExtendInputIfNeeded(RHS);
  2624. SDValue Sub =
  2625. SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
  2626. SDValue Shift =
  2627. SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Sub,
  2628. S->getI64Imm(1, dl), S->getI64Imm(63, dl)),
  2629. 0);
  2630. return
  2631. SDValue(CurDAG->getMachineNode(PPC::XORI8, dl,
  2632. MVT::i64, Shift, S->getI32Imm(1, dl)), 0);
  2633. }
  2634. case ISD::SETGT: {
  2635. // (zext (setcc %a, %b, setgt)) -> (lshr (sub %b, %a), 63)
  2636. // (zext (setcc %a, -1, setgt)) -> (lshr (~ %a), 31)
  2637. // (zext (setcc %a, 0, setgt)) -> (lshr (- %a), 63)
  2638. // Handle SETLT -1 (which is equivalent to SETGE 0).
  2639. if (IsRHSNegOne)
  2640. return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
  2641. if (IsRHSZero) {
  2642. if (CmpInGPR == ICGPR_NonExtIn)
  2643. return SDValue();
  2644. // The upper 32-bits of the register can't be undefined for this sequence.
  2645. LHS = signExtendInputIfNeeded(LHS);
  2646. RHS = signExtendInputIfNeeded(RHS);
  2647. SDValue Neg =
  2648. SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
  2649. return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
  2650. Neg, S->getI32Imm(1, dl), S->getI32Imm(63, dl)), 0);
  2651. }
  2652. // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
  2653. // (%b < %a) by swapping inputs and falling through.
  2654. std::swap(LHS, RHS);
  2655. ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
  2656. IsRHSZero = RHSConst && RHSConst->isNullValue();
  2657. IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
  2658. LLVM_FALLTHROUGH;
  2659. }
  2660. case ISD::SETLT: {
  2661. // (zext (setcc %a, %b, setlt)) -> (lshr (sub %a, %b), 63)
  2662. // (zext (setcc %a, 1, setlt)) -> (xor (lshr (- %a), 63), 1)
  2663. // (zext (setcc %a, 0, setlt)) -> (lshr %a, 31)
  2664. // Handle SETLT 1 (which is equivalent to SETLE 0).
  2665. if (IsRHSOne) {
  2666. if (CmpInGPR == ICGPR_NonExtIn)
  2667. return SDValue();
  2668. return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
  2669. }
  2670. if (IsRHSZero) {
  2671. SDValue ShiftOps[] = { LHS, S->getI32Imm(1, dl), S->getI32Imm(31, dl),
  2672. S->getI32Imm(31, dl) };
  2673. return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
  2674. ShiftOps), 0);
  2675. }
  2676. if (CmpInGPR == ICGPR_NonExtIn)
  2677. return SDValue();
  2678. // The upper 32-bits of the register can't be undefined for this sequence.
  2679. LHS = signExtendInputIfNeeded(LHS);
  2680. RHS = signExtendInputIfNeeded(RHS);
  2681. SDValue SUBFNode =
  2682. SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
  2683. return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
  2684. SUBFNode, S->getI64Imm(1, dl),
  2685. S->getI64Imm(63, dl)), 0);
  2686. }
  2687. case ISD::SETUGE:
  2688. // (zext (setcc %a, %b, setuge)) -> (xor (lshr (sub %b, %a), 63), 1)
  2689. // (zext (setcc %a, %b, setule)) -> (xor (lshr (sub %a, %b), 63), 1)
  2690. std::swap(LHS, RHS);
  2691. LLVM_FALLTHROUGH;
  2692. case ISD::SETULE: {
  2693. if (CmpInGPR == ICGPR_NonExtIn)
  2694. return SDValue();
  2695. // The upper 32-bits of the register can't be undefined for this sequence.
  2696. LHS = zeroExtendInputIfNeeded(LHS);
  2697. RHS = zeroExtendInputIfNeeded(RHS);
  2698. SDValue Subtract =
  2699. SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
  2700. SDValue SrdiNode =
  2701. SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
  2702. Subtract, S->getI64Imm(1, dl),
  2703. S->getI64Imm(63, dl)), 0);
  2704. return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, SrdiNode,
  2705. S->getI32Imm(1, dl)), 0);
  2706. }
  2707. case ISD::SETUGT:
  2708. // (zext (setcc %a, %b, setugt)) -> (lshr (sub %b, %a), 63)
  2709. // (zext (setcc %a, %b, setult)) -> (lshr (sub %a, %b), 63)
  2710. std::swap(LHS, RHS);
  2711. LLVM_FALLTHROUGH;
  2712. case ISD::SETULT: {
  2713. if (CmpInGPR == ICGPR_NonExtIn)
  2714. return SDValue();
  2715. // The upper 32-bits of the register can't be undefined for this sequence.
  2716. LHS = zeroExtendInputIfNeeded(LHS);
  2717. RHS = zeroExtendInputIfNeeded(RHS);
  2718. SDValue Subtract =
  2719. SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
  2720. return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
  2721. Subtract, S->getI64Imm(1, dl),
  2722. S->getI64Imm(63, dl)), 0);
  2723. }
  2724. }
  2725. }
  2726. /// Produces a sign-extended result of comparing two 32-bit values according to
  2727. /// the passed condition code.
  2728. SDValue
  2729. IntegerCompareEliminator::get32BitSExtCompare(SDValue LHS, SDValue RHS,
  2730. ISD::CondCode CC,
  2731. int64_t RHSValue, SDLoc dl) {
  2732. if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 ||
  2733. CmpInGPR == ICGPR_ZextI64 || CmpInGPR == ICGPR_Zext)
  2734. return SDValue();
  2735. bool IsRHSZero = RHSValue == 0;
  2736. bool IsRHSOne = RHSValue == 1;
  2737. bool IsRHSNegOne = RHSValue == -1LL;
  2738. switch (CC) {
  2739. default: return SDValue();
  2740. case ISD::SETEQ: {
  2741. // (sext (setcc %a, %b, seteq)) ->
  2742. // (ashr (shl (ctlz (xor %a, %b)), 58), 63)
  2743. // (sext (setcc %a, 0, seteq)) ->
  2744. // (ashr (shl (ctlz %a), 58), 63)
  2745. SDValue CountInput = IsRHSZero ? LHS :
  2746. SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
  2747. SDValue Cntlzw =
  2748. SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, CountInput), 0);
  2749. SDValue SHLOps[] = { Cntlzw, S->getI32Imm(27, dl),
  2750. S->getI32Imm(5, dl), S->getI32Imm(31, dl) };
  2751. SDValue Slwi =
  2752. SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, SHLOps), 0);
  2753. return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Slwi), 0);
  2754. }
  2755. case ISD::SETNE: {
  2756. // Bitwise xor the operands, count leading zeros, shift right by 5 bits and
  2757. // flip the bit, finally take 2's complement.
  2758. // (sext (setcc %a, %b, setne)) ->
  2759. // (neg (xor (lshr (ctlz (xor %a, %b)), 5), 1))
  2760. // Same as above, but the first xor is not needed.
  2761. // (sext (setcc %a, 0, setne)) ->
  2762. // (neg (xor (lshr (ctlz %a), 5), 1))
  2763. SDValue Xor = IsRHSZero ? LHS :
  2764. SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
  2765. SDValue Clz =
  2766. SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
  2767. SDValue ShiftOps[] =
  2768. { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), S->getI32Imm(31, dl) };
  2769. SDValue Shift =
  2770. SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);
  2771. SDValue Xori =
  2772. SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,
  2773. S->getI32Imm(1, dl)), 0);
  2774. return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Xori), 0);
  2775. }
  2776. case ISD::SETGE: {
  2777. // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %a, %b), 63), -1)
  2778. // (sext (setcc %a, 0, setge)) -> (ashr (~ %a), 31)
  2779. if (IsRHSZero)
  2780. return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
  2781. // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
  2782. // by swapping inputs and falling through.
  2783. std::swap(LHS, RHS);
  2784. ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
  2785. IsRHSZero = RHSConst && RHSConst->isNullValue();
  2786. LLVM_FALLTHROUGH;
  2787. }
  2788. case ISD::SETLE: {
  2789. if (CmpInGPR == ICGPR_NonExtIn)
  2790. return SDValue();
  2791. // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %b, %a), 63), -1)
  2792. // (sext (setcc %a, 0, setle)) -> (add (lshr (- %a), 63), -1)
  2793. if (IsRHSZero)
  2794. return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
  2795. // The upper 32-bits of the register can't be undefined for this sequence.
  2796. LHS = signExtendInputIfNeeded(LHS);
  2797. RHS = signExtendInputIfNeeded(RHS);
  2798. SDValue SUBFNode =
  2799. SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, MVT::Glue,
  2800. LHS, RHS), 0);
  2801. SDValue Srdi =
  2802. SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
  2803. SUBFNode, S->getI64Imm(1, dl),
  2804. S->getI64Imm(63, dl)), 0);
  2805. return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Srdi,
  2806. S->getI32Imm(-1, dl)), 0);
  2807. }
  2808. case ISD::SETGT: {
  2809. // (sext (setcc %a, %b, setgt)) -> (ashr (sub %b, %a), 63)
  2810. // (sext (setcc %a, -1, setgt)) -> (ashr (~ %a), 31)
  2811. // (sext (setcc %a, 0, setgt)) -> (ashr (- %a), 63)
  2812. if (IsRHSNegOne)
  2813. return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
  2814. if (IsRHSZero) {
  2815. if (CmpInGPR == ICGPR_NonExtIn)
  2816. return SDValue();
  2817. // The upper 32-bits of the register can't be undefined for this sequence.
  2818. LHS = signExtendInputIfNeeded(LHS);
  2819. RHS = signExtendInputIfNeeded(RHS);
  2820. SDValue Neg =
  2821. SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
  2822. return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Neg,
  2823. S->getI64Imm(63, dl)), 0);
  2824. }
  2825. // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
  2826. // (%b < %a) by swapping inputs and falling through.
  2827. std::swap(LHS, RHS);
  2828. ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
  2829. IsRHSZero = RHSConst && RHSConst->isNullValue();
  2830. IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
  2831. LLVM_FALLTHROUGH;
  2832. }
  2833. case ISD::SETLT: {
  2834. // (sext (setcc %a, %b, setgt)) -> (ashr (sub %a, %b), 63)
  2835. // (sext (setcc %a, 1, setgt)) -> (add (lshr (- %a), 63), -1)
  2836. // (sext (setcc %a, 0, setgt)) -> (ashr %a, 31)
  2837. if (IsRHSOne) {
  2838. if (CmpInGPR == ICGPR_NonExtIn)
  2839. return SDValue();
  2840. return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
  2841. }
  2842. if (IsRHSZero)
  2843. return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, LHS,
  2844. S->getI32Imm(31, dl)), 0);
  2845. if (CmpInGPR == ICGPR_NonExtIn)
  2846. return SDValue();
  2847. // The upper 32-bits of the register can't be undefined for this sequence.
  2848. LHS = signExtendInputIfNeeded(LHS);
  2849. RHS = signExtendInputIfNeeded(RHS);
  2850. SDValue SUBFNode =
  2851. SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
  2852. return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
  2853. SUBFNode, S->getI64Imm(63, dl)), 0);
  2854. }
  2855. case ISD::SETUGE:
  2856. // (sext (setcc %a, %b, setuge)) -> (add (lshr (sub %a, %b), 63), -1)
  2857. // (sext (setcc %a, %b, setule)) -> (add (lshr (sub %b, %a), 63), -1)
  2858. std::swap(LHS, RHS);
  2859. LLVM_FALLTHROUGH;
  2860. case ISD::SETULE: {
  2861. if (CmpInGPR == ICGPR_NonExtIn)
  2862. return SDValue();
  2863. // The upper 32-bits of the register can't be undefined for this sequence.
  2864. LHS = zeroExtendInputIfNeeded(LHS);
  2865. RHS = zeroExtendInputIfNeeded(RHS);
  2866. SDValue Subtract =
  2867. SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
  2868. SDValue Shift =
  2869. SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Subtract,
  2870. S->getI32Imm(1, dl), S->getI32Imm(63,dl)),
  2871. 0);
  2872. return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Shift,
  2873. S->getI32Imm(-1, dl)), 0);
  2874. }
  2875. case ISD::SETUGT:
  2876. // (sext (setcc %a, %b, setugt)) -> (ashr (sub %b, %a), 63)
  2877. // (sext (setcc %a, %b, setugt)) -> (ashr (sub %a, %b), 63)
  2878. std::swap(LHS, RHS);
  2879. LLVM_FALLTHROUGH;
  2880. case ISD::SETULT: {
  2881. if (CmpInGPR == ICGPR_NonExtIn)
  2882. return SDValue();
  2883. // The upper 32-bits of the register can't be undefined for this sequence.
  2884. LHS = zeroExtendInputIfNeeded(LHS);
  2885. RHS = zeroExtendInputIfNeeded(RHS);
  2886. SDValue Subtract =
  2887. SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
  2888. return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
  2889. Subtract, S->getI64Imm(63, dl)), 0);
  2890. }
  2891. }
  2892. }
  2893. /// Produces a zero-extended result of comparing two 64-bit values according to
  2894. /// the passed condition code.
  2895. SDValue
  2896. IntegerCompareEliminator::get64BitZExtCompare(SDValue LHS, SDValue RHS,
  2897. ISD::CondCode CC,
  2898. int64_t RHSValue, SDLoc dl) {
  2899. if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 ||
  2900. CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_Sext)
  2901. return SDValue();
  2902. bool IsRHSZero = RHSValue == 0;
  2903. bool IsRHSOne = RHSValue == 1;
  2904. bool IsRHSNegOne = RHSValue == -1LL;
  2905. switch (CC) {
  2906. default: return SDValue();
  2907. case ISD::SETEQ: {
  2908. // (zext (setcc %a, %b, seteq)) -> (lshr (ctlz (xor %a, %b)), 6)
  2909. // (zext (setcc %a, 0, seteq)) -> (lshr (ctlz %a), 6)
  2910. SDValue Xor = IsRHSZero ? LHS :
  2911. SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
  2912. SDValue Clz =
  2913. SDValue(CurDAG->getMachineNode(PPC::CNTLZD, dl, MVT::i64, Xor), 0);
  2914. return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Clz,
  2915. S->getI64Imm(58, dl),
  2916. S->getI64Imm(63, dl)), 0);
  2917. }
  2918. case ISD::SETNE: {
  2919. // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
  2920. // (zext (setcc %a, %b, setne)) -> (sube addc.reg, addc.reg, addc.CA)
  2921. // {addcz.reg, addcz.CA} = (addcarry %a, -1)
  2922. // (zext (setcc %a, 0, setne)) -> (sube addcz.reg, addcz.reg, addcz.CA)
  2923. SDValue Xor = IsRHSZero ? LHS :
  2924. SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
  2925. SDValue AC =
  2926. SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,
  2927. Xor, S->getI32Imm(~0U, dl)), 0);
  2928. return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, AC,
  2929. Xor, AC.getValue(1)), 0);
  2930. }
  2931. case ISD::SETGE: {
  2932. // {subc.reg, subc.CA} = (subcarry %a, %b)
  2933. // (zext (setcc %a, %b, setge)) ->
  2934. // (adde (lshr %b, 63), (ashr %a, 63), subc.CA)
  2935. // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 63)
  2936. if (IsRHSZero)
  2937. return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
  2938. std::swap(LHS, RHS);
  2939. ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
  2940. IsRHSZero = RHSConst && RHSConst->isNullValue();
  2941. LLVM_FALLTHROUGH;
  2942. }
  2943. case ISD::SETLE: {
  2944. // {subc.reg, subc.CA} = (subcarry %b, %a)
  2945. // (zext (setcc %a, %b, setge)) ->
  2946. // (adde (lshr %a, 63), (ashr %b, 63), subc.CA)
  2947. // (zext (setcc %a, 0, setge)) -> (lshr (or %a, (add %a, -1)), 63)
  2948. if (IsRHSZero)
  2949. return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
  2950. SDValue ShiftL =
  2951. SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
  2952. S->getI64Imm(1, dl),
  2953. S->getI64Imm(63, dl)), 0);
  2954. SDValue ShiftR =
  2955. SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS,
  2956. S->getI64Imm(63, dl)), 0);
  2957. SDValue SubtractCarry =
  2958. SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
  2959. LHS, RHS), 1);
  2960. return SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
  2961. ShiftR, ShiftL, SubtractCarry), 0);
  2962. }
  2963. case ISD::SETGT: {
  2964. // {subc.reg, subc.CA} = (subcarry %b, %a)
  2965. // (zext (setcc %a, %b, setgt)) ->
  2966. // (xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
  2967. // (zext (setcc %a, 0, setgt)) -> (lshr (nor (add %a, -1), %a), 63)
  2968. if (IsRHSNegOne)
  2969. return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
  2970. if (IsRHSZero) {
  2971. SDValue Addi =
  2972. SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
  2973. S->getI64Imm(~0ULL, dl)), 0);
  2974. SDValue Nor =
  2975. SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Addi, LHS), 0);
  2976. return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Nor,
  2977. S->getI64Imm(1, dl),
  2978. S->getI64Imm(63, dl)), 0);
  2979. }
  2980. std::swap(LHS, RHS);
  2981. ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
  2982. IsRHSZero = RHSConst && RHSConst->isNullValue();
  2983. IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
  2984. LLVM_FALLTHROUGH;
  2985. }
  2986. case ISD::SETLT: {
  2987. // {subc.reg, subc.CA} = (subcarry %a, %b)
  2988. // (zext (setcc %a, %b, setlt)) ->
  2989. // (xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
  2990. // (zext (setcc %a, 0, setlt)) -> (lshr %a, 63)
  2991. if (IsRHSOne)
  2992. return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
  2993. if (IsRHSZero)
  2994. return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
  2995. S->getI64Imm(1, dl),
  2996. S->getI64Imm(63, dl)), 0);
  2997. SDValue SRADINode =
  2998. SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
  2999. LHS, S->getI64Imm(63, dl)), 0);
  3000. SDValue SRDINode =
  3001. SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
  3002. RHS, S->getI64Imm(1, dl),
  3003. S->getI64Imm(63, dl)), 0);
  3004. SDValue SUBFC8Carry =
  3005. SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
  3006. RHS, LHS), 1);
  3007. SDValue ADDE8Node =
  3008. SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
  3009. SRDINode, SRADINode, SUBFC8Carry), 0);
  3010. return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
  3011. ADDE8Node, S->getI64Imm(1, dl)), 0);
  3012. }
  3013. case ISD::SETUGE:
  3014. // {subc.reg, subc.CA} = (subcarry %a, %b)
  3015. // (zext (setcc %a, %b, setuge)) -> (add (sube %b, %b, subc.CA), 1)
  3016. std::swap(LHS, RHS);
  3017. LLVM_FALLTHROUGH;
  3018. case ISD::SETULE: {
  3019. // {subc.reg, subc.CA} = (subcarry %b, %a)
  3020. // (zext (setcc %a, %b, setule)) -> (add (sube %a, %a, subc.CA), 1)
  3021. SDValue SUBFC8Carry =
  3022. SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
  3023. LHS, RHS), 1);
  3024. SDValue SUBFE8Node =
  3025. SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue,
  3026. LHS, LHS, SUBFC8Carry), 0);
  3027. return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64,
  3028. SUBFE8Node, S->getI64Imm(1, dl)), 0);
  3029. }
  3030. case ISD::SETUGT:
  3031. // {subc.reg, subc.CA} = (subcarry %b, %a)
  3032. // (zext (setcc %a, %b, setugt)) -> -(sube %b, %b, subc.CA)
  3033. std::swap(LHS, RHS);
  3034. LLVM_FALLTHROUGH;
  3035. case ISD::SETULT: {
  3036. // {subc.reg, subc.CA} = (subcarry %a, %b)
  3037. // (zext (setcc %a, %b, setult)) -> -(sube %a, %a, subc.CA)
  3038. SDValue SubtractCarry =
  3039. SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
  3040. RHS, LHS), 1);
  3041. SDValue ExtSub =
  3042. SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64,
  3043. LHS, LHS, SubtractCarry), 0);
  3044. return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64,
  3045. ExtSub), 0);
  3046. }
  3047. }
  3048. }
  3049. /// Produces a sign-extended result of comparing two 64-bit values according to
  3050. /// the passed condition code.
  3051. SDValue
  3052. IntegerCompareEliminator::get64BitSExtCompare(SDValue LHS, SDValue RHS,
  3053. ISD::CondCode CC,
  3054. int64_t RHSValue, SDLoc dl) {
  3055. if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 ||
  3056. CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_Zext)
  3057. return SDValue();
  3058. bool IsRHSZero = RHSValue == 0;
  3059. bool IsRHSOne = RHSValue == 1;
  3060. bool IsRHSNegOne = RHSValue == -1LL;
  3061. switch (CC) {
  3062. default: return SDValue();
  3063. case ISD::SETEQ: {
  3064. // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
  3065. // (sext (setcc %a, %b, seteq)) -> (sube addc.reg, addc.reg, addc.CA)
  3066. // {addcz.reg, addcz.CA} = (addcarry %a, -1)
  3067. // (sext (setcc %a, 0, seteq)) -> (sube addcz.reg, addcz.reg, addcz.CA)
  3068. SDValue AddInput = IsRHSZero ? LHS :
  3069. SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
  3070. SDValue Addic =
  3071. SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,
  3072. AddInput, S->getI32Imm(~0U, dl)), 0);
  3073. return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, Addic,
  3074. Addic, Addic.getValue(1)), 0);
  3075. }
  3076. case ISD::SETNE: {
  3077. // {subfc.reg, subfc.CA} = (subcarry 0, (xor %a, %b))
  3078. // (sext (setcc %a, %b, setne)) -> (sube subfc.reg, subfc.reg, subfc.CA)
  3079. // {subfcz.reg, subfcz.CA} = (subcarry 0, %a)
  3080. // (sext (setcc %a, 0, setne)) -> (sube subfcz.reg, subfcz.reg, subfcz.CA)
  3081. SDValue Xor = IsRHSZero ? LHS :
  3082. SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
  3083. SDValue SC =
  3084. SDValue(CurDAG->getMachineNode(PPC::SUBFIC8, dl, MVT::i64, MVT::Glue,
  3085. Xor, S->getI32Imm(0, dl)), 0);
  3086. return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, SC,
  3087. SC, SC.getValue(1)), 0);
  3088. }
  3089. case ISD::SETGE: {
  3090. // {subc.reg, subc.CA} = (subcarry %a, %b)
  3091. // (zext (setcc %a, %b, setge)) ->
  3092. // (- (adde (lshr %b, 63), (ashr %a, 63), subc.CA))
  3093. // (zext (setcc %a, 0, setge)) -> (~ (ashr %a, 63))
  3094. if (IsRHSZero)
  3095. return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
  3096. std::swap(LHS, RHS);
  3097. ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
  3098. IsRHSZero = RHSConst && RHSConst->isNullValue();
  3099. LLVM_FALLTHROUGH;
  3100. }
  3101. case ISD::SETLE: {
  3102. // {subc.reg, subc.CA} = (subcarry %b, %a)
  3103. // (zext (setcc %a, %b, setge)) ->
  3104. // (- (adde (lshr %a, 63), (ashr %b, 63), subc.CA))
  3105. // (zext (setcc %a, 0, setge)) -> (ashr (or %a, (add %a, -1)), 63)
  3106. if (IsRHSZero)
  3107. return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
  3108. SDValue ShiftR =
  3109. SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS,
  3110. S->getI64Imm(63, dl)), 0);
  3111. SDValue ShiftL =
  3112. SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
  3113. S->getI64Imm(1, dl),
  3114. S->getI64Imm(63, dl)), 0);
  3115. SDValue SubtractCarry =
  3116. SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
  3117. LHS, RHS), 1);
  3118. SDValue Adde =
  3119. SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
  3120. ShiftR, ShiftL, SubtractCarry), 0);
  3121. return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, Adde), 0);
  3122. }
  3123. case ISD::SETGT: {
  3124. // {subc.reg, subc.CA} = (subcarry %b, %a)
  3125. // (zext (setcc %a, %b, setgt)) ->
  3126. // -(xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
  3127. // (zext (setcc %a, 0, setgt)) -> (ashr (nor (add %a, -1), %a), 63)
  3128. if (IsRHSNegOne)
  3129. return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
  3130. if (IsRHSZero) {
  3131. SDValue Add =
  3132. SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
  3133. S->getI64Imm(-1, dl)), 0);
  3134. SDValue Nor =
  3135. SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Add, LHS), 0);
  3136. return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Nor,
  3137. S->getI64Imm(63, dl)), 0);
  3138. }
  3139. std::swap(LHS, RHS);
  3140. ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
  3141. IsRHSZero = RHSConst && RHSConst->isNullValue();
  3142. IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
  3143. LLVM_FALLTHROUGH;
  3144. }
  3145. case ISD::SETLT: {
  3146. // {subc.reg, subc.CA} = (subcarry %a, %b)
  3147. // (zext (setcc %a, %b, setlt)) ->
  3148. // -(xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
  3149. // (zext (setcc %a, 0, setlt)) -> (ashr %a, 63)
  3150. if (IsRHSOne)
  3151. return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
  3152. if (IsRHSZero) {
  3153. return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, LHS,
  3154. S->getI64Imm(63, dl)), 0);
  3155. }
  3156. SDValue SRADINode =
  3157. SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
  3158. LHS, S->getI64Imm(63, dl)), 0);
  3159. SDValue SRDINode =
  3160. SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
  3161. RHS, S->getI64Imm(1, dl),
  3162. S->getI64Imm(63, dl)), 0);
  3163. SDValue SUBFC8Carry =
  3164. SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
  3165. RHS, LHS), 1);
  3166. SDValue ADDE8Node =
  3167. SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64,
  3168. SRDINode, SRADINode, SUBFC8Carry), 0);
  3169. SDValue XORI8Node =
  3170. SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
  3171. ADDE8Node, S->getI64Imm(1, dl)), 0);
  3172. return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64,
  3173. XORI8Node), 0);
  3174. }
  3175. case ISD::SETUGE:
  3176. // {subc.reg, subc.CA} = (subcarry %a, %b)
  3177. // (sext (setcc %a, %b, setuge)) -> ~(sube %b, %b, subc.CA)
  3178. std::swap(LHS, RHS);
  3179. LLVM_FALLTHROUGH;
  3180. case ISD::SETULE: {
  3181. // {subc.reg, subc.CA} = (subcarry %b, %a)
  3182. // (sext (setcc %a, %b, setule)) -> ~(sube %a, %a, subc.CA)
  3183. SDValue SubtractCarry =
  3184. SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
  3185. LHS, RHS), 1);
  3186. SDValue ExtSub =
  3187. SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue, LHS,
  3188. LHS, SubtractCarry), 0);
  3189. return SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64,
  3190. ExtSub, ExtSub), 0);
  3191. }
  3192. case ISD::SETUGT:
  3193. // {subc.reg, subc.CA} = (subcarry %b, %a)
  3194. // (sext (setcc %a, %b, setugt)) -> (sube %b, %b, subc.CA)
  3195. std::swap(LHS, RHS);
  3196. LLVM_FALLTHROUGH;
  3197. case ISD::SETULT: {
  3198. // {subc.reg, subc.CA} = (subcarry %a, %b)
  3199. // (sext (setcc %a, %b, setult)) -> (sube %a, %a, subc.CA)
  3200. SDValue SubCarry =
  3201. SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
  3202. RHS, LHS), 1);
  3203. return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64,
  3204. LHS, LHS, SubCarry), 0);
  3205. }
  3206. }
  3207. }
  3208. /// Do all uses of this SDValue need the result in a GPR?
  3209. /// This is meant to be used on values that have type i1 since
  3210. /// it is somewhat meaningless to ask if values of other types
  3211. /// should be kept in GPR's.
  3212. static bool allUsesExtend(SDValue Compare, SelectionDAG *CurDAG) {
  3213. assert(Compare.getOpcode() == ISD::SETCC &&
  3214. "An ISD::SETCC node required here.");
  3215. // For values that have a single use, the caller should obviously already have
  3216. // checked if that use is an extending use. We check the other uses here.
  3217. if (Compare.hasOneUse())
  3218. return true;
  3219. // We want the value in a GPR if it is being extended, used for a select, or
  3220. // used in logical operations.
  3221. for (auto CompareUse : Compare.getNode()->uses())
  3222. if (CompareUse->getOpcode() != ISD::SIGN_EXTEND &&
  3223. CompareUse->getOpcode() != ISD::ZERO_EXTEND &&
  3224. CompareUse->getOpcode() != ISD::SELECT &&
  3225. !isLogicOp(CompareUse->getOpcode())) {
  3226. OmittedForNonExtendUses++;
  3227. return false;
  3228. }
  3229. return true;
  3230. }
  3231. /// Returns an equivalent of a SETCC node but with the result the same width as
  3232. /// the inputs. This can also be used for SELECT_CC if either the true or false
  3233. /// values is a power of two while the other is zero.
  3234. SDValue IntegerCompareEliminator::getSETCCInGPR(SDValue Compare,
  3235. SetccInGPROpts ConvOpts) {
  3236. assert((Compare.getOpcode() == ISD::SETCC ||
  3237. Compare.getOpcode() == ISD::SELECT_CC) &&
  3238. "An ISD::SETCC node required here.");
  3239. // Don't convert this comparison to a GPR sequence because there are uses
  3240. // of the i1 result (i.e. uses that require the result in the CR).
  3241. if ((Compare.getOpcode() == ISD::SETCC) && !allUsesExtend(Compare, CurDAG))
  3242. return SDValue();
  3243. SDValue LHS = Compare.getOperand(0);
  3244. SDValue RHS = Compare.getOperand(1);
  3245. // The condition code is operand 2 for SETCC and operand 4 for SELECT_CC.
  3246. int CCOpNum = Compare.getOpcode() == ISD::SELECT_CC ? 4 : 2;
  3247. ISD::CondCode CC =
  3248. cast<CondCodeSDNode>(Compare.getOperand(CCOpNum))->get();
  3249. EVT InputVT = LHS.getValueType();
  3250. if (InputVT != MVT::i32 && InputVT != MVT::i64)
  3251. return SDValue();
  3252. if (ConvOpts == SetccInGPROpts::ZExtInvert ||
  3253. ConvOpts == SetccInGPROpts::SExtInvert)
  3254. CC = ISD::getSetCCInverse(CC, InputVT);
  3255. bool Inputs32Bit = InputVT == MVT::i32;
  3256. SDLoc dl(Compare);
  3257. ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
  3258. int64_t RHSValue = RHSConst ? RHSConst->getSExtValue() : INT64_MAX;
  3259. bool IsSext = ConvOpts == SetccInGPROpts::SExtOrig ||
  3260. ConvOpts == SetccInGPROpts::SExtInvert;
  3261. if (IsSext && Inputs32Bit)
  3262. return get32BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
  3263. else if (Inputs32Bit)
  3264. return get32BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
  3265. else if (IsSext)
  3266. return get64BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
  3267. return get64BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
  3268. }
  3269. } // end anonymous namespace
  3270. bool PPCDAGToDAGISel::tryIntCompareInGPR(SDNode *N) {
  3271. if (N->getValueType(0) != MVT::i32 &&
  3272. N->getValueType(0) != MVT::i64)
  3273. return false;
  3274. // This optimization will emit code that assumes 64-bit registers
  3275. // so we don't want to run it in 32-bit mode. Also don't run it
  3276. // on functions that are not to be optimized.
  3277. if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64())
  3278. return false;
  3279. // For POWER10, it is more profitable to use the set boolean extension
  3280. // instructions rather than the integer compare elimination codegen.
  3281. // Users can override this via the command line option, `--ppc-gpr-icmps`.
  3282. if (!(CmpInGPR.getNumOccurrences() > 0) && Subtarget->isISA3_1())
  3283. return false;
  3284. switch (N->getOpcode()) {
  3285. default: break;
  3286. case ISD::ZERO_EXTEND:
  3287. case ISD::SIGN_EXTEND:
  3288. case ISD::AND:
  3289. case ISD::OR:
  3290. case ISD::XOR: {
  3291. IntegerCompareEliminator ICmpElim(CurDAG, this);
  3292. if (SDNode *New = ICmpElim.Select(N)) {
  3293. ReplaceNode(N, New);
  3294. return true;
  3295. }
  3296. }
  3297. }
  3298. return false;
  3299. }
  3300. bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) {
  3301. if (N->getValueType(0) != MVT::i32 &&
  3302. N->getValueType(0) != MVT::i64)
  3303. return false;
  3304. if (!UseBitPermRewriter)
  3305. return false;
  3306. switch (N->getOpcode()) {
  3307. default: break;
  3308. case ISD::ROTL:
  3309. case ISD::SHL:
  3310. case ISD::SRL:
  3311. case ISD::AND:
  3312. case ISD::OR: {
  3313. BitPermutationSelector BPS(CurDAG);
  3314. if (SDNode *New = BPS.Select(N)) {
  3315. ReplaceNode(N, New);
  3316. return true;
  3317. }
  3318. return false;
  3319. }
  3320. }
  3321. return false;
  3322. }
  3323. /// SelectCC - Select a comparison of the specified values with the specified
  3324. /// condition code, returning the CR# of the expression.
  3325. SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
  3326. const SDLoc &dl, SDValue Chain) {
  3327. // Always select the LHS.
  3328. unsigned Opc;
  3329. if (LHS.getValueType() == MVT::i32) {
  3330. unsigned Imm;
  3331. if (CC == ISD::SETEQ || CC == ISD::SETNE) {
  3332. if (isInt32Immediate(RHS, Imm)) {
  3333. // SETEQ/SETNE comparison with 16-bit immediate, fold it.
  3334. if (isUInt<16>(Imm))
  3335. return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
  3336. getI32Imm(Imm & 0xFFFF, dl)),
  3337. 0);
  3338. // If this is a 16-bit signed immediate, fold it.
  3339. if (isInt<16>((int)Imm))
  3340. return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
  3341. getI32Imm(Imm & 0xFFFF, dl)),
  3342. 0);
  3343. // For non-equality comparisons, the default code would materialize the
  3344. // constant, then compare against it, like this:
  3345. // lis r2, 4660
  3346. // ori r2, r2, 22136
  3347. // cmpw cr0, r3, r2
  3348. // Since we are just comparing for equality, we can emit this instead:
  3349. // xoris r0,r3,0x1234
  3350. // cmplwi cr0,r0,0x5678
  3351. // beq cr0,L6
  3352. SDValue Xor(CurDAG->getMachineNode(PPC::XORIS, dl, MVT::i32, LHS,
  3353. getI32Imm(Imm >> 16, dl)), 0);
  3354. return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, Xor,
  3355. getI32Imm(Imm & 0xFFFF, dl)), 0);
  3356. }
  3357. Opc = PPC::CMPLW;
  3358. } else if (ISD::isUnsignedIntSetCC(CC)) {
  3359. if (isInt32Immediate(RHS, Imm) && isUInt<16>(Imm))
  3360. return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
  3361. getI32Imm(Imm & 0xFFFF, dl)), 0);
  3362. Opc = PPC::CMPLW;
  3363. } else {
  3364. int16_t SImm;
  3365. if (isIntS16Immediate(RHS, SImm))
  3366. return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
  3367. getI32Imm((int)SImm & 0xFFFF,
  3368. dl)),
  3369. 0);
  3370. Opc = PPC::CMPW;
  3371. }
  3372. } else if (LHS.getValueType() == MVT::i64) {
  3373. uint64_t Imm;
  3374. if (CC == ISD::SETEQ || CC == ISD::SETNE) {
  3375. if (isInt64Immediate(RHS.getNode(), Imm)) {
  3376. // SETEQ/SETNE comparison with 16-bit immediate, fold it.
  3377. if (isUInt<16>(Imm))
  3378. return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
  3379. getI32Imm(Imm & 0xFFFF, dl)),
  3380. 0);
  3381. // If this is a 16-bit signed immediate, fold it.
  3382. if (isInt<16>(Imm))
  3383. return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
  3384. getI32Imm(Imm & 0xFFFF, dl)),
  3385. 0);
  3386. // For non-equality comparisons, the default code would materialize the
  3387. // constant, then compare against it, like this:
  3388. // lis r2, 4660
  3389. // ori r2, r2, 22136
  3390. // cmpd cr0, r3, r2
  3391. // Since we are just comparing for equality, we can emit this instead:
  3392. // xoris r0,r3,0x1234
  3393. // cmpldi cr0,r0,0x5678
  3394. // beq cr0,L6
  3395. if (isUInt<32>(Imm)) {
  3396. SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS,
  3397. getI64Imm(Imm >> 16, dl)), 0);
  3398. return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor,
  3399. getI64Imm(Imm & 0xFFFF, dl)),
  3400. 0);
  3401. }
  3402. }
  3403. Opc = PPC::CMPLD;
  3404. } else if (ISD::isUnsignedIntSetCC(CC)) {
  3405. if (isInt64Immediate(RHS.getNode(), Imm) && isUInt<16>(Imm))
  3406. return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
  3407. getI64Imm(Imm & 0xFFFF, dl)), 0);
  3408. Opc = PPC::CMPLD;
  3409. } else {
  3410. int16_t SImm;
  3411. if (isIntS16Immediate(RHS, SImm))
  3412. return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
  3413. getI64Imm(SImm & 0xFFFF, dl)),
  3414. 0);
  3415. Opc = PPC::CMPD;
  3416. }
  3417. } else if (LHS.getValueType() == MVT::f32) {
  3418. if (Subtarget->hasSPE()) {
  3419. switch (CC) {
  3420. default:
  3421. case ISD::SETEQ:
  3422. case ISD::SETNE:
  3423. Opc = PPC::EFSCMPEQ;
  3424. break;
  3425. case ISD::SETLT:
  3426. case ISD::SETGE:
  3427. case ISD::SETOLT:
  3428. case ISD::SETOGE:
  3429. case ISD::SETULT:
  3430. case ISD::SETUGE:
  3431. Opc = PPC::EFSCMPLT;
  3432. break;
  3433. case ISD::SETGT:
  3434. case ISD::SETLE:
  3435. case ISD::SETOGT:
  3436. case ISD::SETOLE:
  3437. case ISD::SETUGT:
  3438. case ISD::SETULE:
  3439. Opc = PPC::EFSCMPGT;
  3440. break;
  3441. }
  3442. } else
  3443. Opc = PPC::FCMPUS;
  3444. } else if (LHS.getValueType() == MVT::f64) {
  3445. if (Subtarget->hasSPE()) {
  3446. switch (CC) {
  3447. default:
  3448. case ISD::SETEQ:
  3449. case ISD::SETNE:
  3450. Opc = PPC::EFDCMPEQ;
  3451. break;
  3452. case ISD::SETLT:
  3453. case ISD::SETGE:
  3454. case ISD::SETOLT:
  3455. case ISD::SETOGE:
  3456. case ISD::SETULT:
  3457. case ISD::SETUGE:
  3458. Opc = PPC::EFDCMPLT;
  3459. break;
  3460. case ISD::SETGT:
  3461. case ISD::SETLE:
  3462. case ISD::SETOGT:
  3463. case ISD::SETOLE:
  3464. case ISD::SETUGT:
  3465. case ISD::SETULE:
  3466. Opc = PPC::EFDCMPGT;
  3467. break;
  3468. }
  3469. } else
  3470. Opc = Subtarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD;
  3471. } else {
  3472. assert(LHS.getValueType() == MVT::f128 && "Unknown vt!");
  3473. assert(Subtarget->hasVSX() && "__float128 requires VSX");
  3474. Opc = PPC::XSCMPUQP;
  3475. }
  3476. if (Chain)
  3477. return SDValue(
  3478. CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::Other, LHS, RHS, Chain),
  3479. 0);
  3480. else
  3481. return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0);
  3482. }
  3483. static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC, const EVT &VT,
  3484. const PPCSubtarget *Subtarget) {
  3485. // For SPE instructions, the result is in GT bit of the CR
  3486. bool UseSPE = Subtarget->hasSPE() && VT.isFloatingPoint();
  3487. switch (CC) {
  3488. case ISD::SETUEQ:
  3489. case ISD::SETONE:
  3490. case ISD::SETOLE:
  3491. case ISD::SETOGE:
  3492. llvm_unreachable("Should be lowered by legalize!");
  3493. default: llvm_unreachable("Unknown condition!");
  3494. case ISD::SETOEQ:
  3495. case ISD::SETEQ:
  3496. return UseSPE ? PPC::PRED_GT : PPC::PRED_EQ;
  3497. case ISD::SETUNE:
  3498. case ISD::SETNE:
  3499. return UseSPE ? PPC::PRED_LE : PPC::PRED_NE;
  3500. case ISD::SETOLT:
  3501. case ISD::SETLT:
  3502. return UseSPE ? PPC::PRED_GT : PPC::PRED_LT;
  3503. case ISD::SETULE:
  3504. case ISD::SETLE:
  3505. return PPC::PRED_LE;
  3506. case ISD::SETOGT:
  3507. case ISD::SETGT:
  3508. return PPC::PRED_GT;
  3509. case ISD::SETUGE:
  3510. case ISD::SETGE:
  3511. return UseSPE ? PPC::PRED_LE : PPC::PRED_GE;
  3512. case ISD::SETO: return PPC::PRED_NU;
  3513. case ISD::SETUO: return PPC::PRED_UN;
  3514. // These two are invalid for floating point. Assume we have int.
  3515. case ISD::SETULT: return PPC::PRED_LT;
  3516. case ISD::SETUGT: return PPC::PRED_GT;
  3517. }
  3518. }
  3519. /// getCRIdxForSetCC - Return the index of the condition register field
  3520. /// associated with the SetCC condition, and whether or not the field is
  3521. /// treated as inverted. That is, lt = 0; ge = 0 inverted.
  3522. static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) {
  3523. Invert = false;
  3524. switch (CC) {
  3525. default: llvm_unreachable("Unknown condition!");
  3526. case ISD::SETOLT:
  3527. case ISD::SETLT: return 0; // Bit #0 = SETOLT
  3528. case ISD::SETOGT:
  3529. case ISD::SETGT: return 1; // Bit #1 = SETOGT
  3530. case ISD::SETOEQ:
  3531. case ISD::SETEQ: return 2; // Bit #2 = SETOEQ
  3532. case ISD::SETUO: return 3; // Bit #3 = SETUO
  3533. case ISD::SETUGE:
  3534. case ISD::SETGE: Invert = true; return 0; // !Bit #0 = SETUGE
  3535. case ISD::SETULE:
  3536. case ISD::SETLE: Invert = true; return 1; // !Bit #1 = SETULE
  3537. case ISD::SETUNE:
  3538. case ISD::SETNE: Invert = true; return 2; // !Bit #2 = SETUNE
  3539. case ISD::SETO: Invert = true; return 3; // !Bit #3 = SETO
  3540. case ISD::SETUEQ:
  3541. case ISD::SETOGE:
  3542. case ISD::SETOLE:
  3543. case ISD::SETONE:
  3544. llvm_unreachable("Invalid branch code: should be expanded by legalize");
  3545. // These are invalid for floating point. Assume integer.
  3546. case ISD::SETULT: return 0;
  3547. case ISD::SETUGT: return 1;
  3548. }
  3549. }
  3550. // getVCmpInst: return the vector compare instruction for the specified
  3551. // vector type and condition code. Since this is for altivec specific code,
  3552. // only support the altivec types (v16i8, v8i16, v4i32, v2i64, v1i128,
  3553. // and v4f32).
  3554. static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
  3555. bool HasVSX, bool &Swap, bool &Negate) {
  3556. Swap = false;
  3557. Negate = false;
  3558. if (VecVT.isFloatingPoint()) {
  3559. /* Handle some cases by swapping input operands. */
  3560. switch (CC) {
  3561. case ISD::SETLE: CC = ISD::SETGE; Swap = true; break;
  3562. case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
  3563. case ISD::SETOLE: CC = ISD::SETOGE; Swap = true; break;
  3564. case ISD::SETOLT: CC = ISD::SETOGT; Swap = true; break;
  3565. case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
  3566. case ISD::SETUGT: CC = ISD::SETULT; Swap = true; break;
  3567. default: break;
  3568. }
  3569. /* Handle some cases by negating the result. */
  3570. switch (CC) {
  3571. case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
  3572. case ISD::SETUNE: CC = ISD::SETOEQ; Negate = true; break;
  3573. case ISD::SETULE: CC = ISD::SETOGT; Negate = true; break;
  3574. case ISD::SETULT: CC = ISD::SETOGE; Negate = true; break;
  3575. default: break;
  3576. }
  3577. /* We have instructions implementing the remaining cases. */
  3578. switch (CC) {
  3579. case ISD::SETEQ:
  3580. case ISD::SETOEQ:
  3581. if (VecVT == MVT::v4f32)
  3582. return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
  3583. else if (VecVT == MVT::v2f64)
  3584. return PPC::XVCMPEQDP;
  3585. break;
  3586. case ISD::SETGT:
  3587. case ISD::SETOGT:
  3588. if (VecVT == MVT::v4f32)
  3589. return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
  3590. else if (VecVT == MVT::v2f64)
  3591. return PPC::XVCMPGTDP;
  3592. break;
  3593. case ISD::SETGE:
  3594. case ISD::SETOGE:
  3595. if (VecVT == MVT::v4f32)
  3596. return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP;
  3597. else if (VecVT == MVT::v2f64)
  3598. return PPC::XVCMPGEDP;
  3599. break;
  3600. default:
  3601. break;
  3602. }
  3603. llvm_unreachable("Invalid floating-point vector compare condition");
  3604. } else {
  3605. /* Handle some cases by swapping input operands. */
  3606. switch (CC) {
  3607. case ISD::SETGE: CC = ISD::SETLE; Swap = true; break;
  3608. case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
  3609. case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
  3610. case ISD::SETULT: CC = ISD::SETUGT; Swap = true; break;
  3611. default: break;
  3612. }
  3613. /* Handle some cases by negating the result. */
  3614. switch (CC) {
  3615. case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
  3616. case ISD::SETUNE: CC = ISD::SETUEQ; Negate = true; break;
  3617. case ISD::SETLE: CC = ISD::SETGT; Negate = true; break;
  3618. case ISD::SETULE: CC = ISD::SETUGT; Negate = true; break;
  3619. default: break;
  3620. }
  3621. /* We have instructions implementing the remaining cases. */
  3622. switch (CC) {
  3623. case ISD::SETEQ:
  3624. case ISD::SETUEQ:
  3625. if (VecVT == MVT::v16i8)
  3626. return PPC::VCMPEQUB;
  3627. else if (VecVT == MVT::v8i16)
  3628. return PPC::VCMPEQUH;
  3629. else if (VecVT == MVT::v4i32)
  3630. return PPC::VCMPEQUW;
  3631. else if (VecVT == MVT::v2i64)
  3632. return PPC::VCMPEQUD;
  3633. else if (VecVT == MVT::v1i128)
  3634. return PPC::VCMPEQUQ;
  3635. break;
  3636. case ISD::SETGT:
  3637. if (VecVT == MVT::v16i8)
  3638. return PPC::VCMPGTSB;
  3639. else if (VecVT == MVT::v8i16)
  3640. return PPC::VCMPGTSH;
  3641. else if (VecVT == MVT::v4i32)
  3642. return PPC::VCMPGTSW;
  3643. else if (VecVT == MVT::v2i64)
  3644. return PPC::VCMPGTSD;
  3645. else if (VecVT == MVT::v1i128)
  3646. return PPC::VCMPGTSQ;
  3647. break;
  3648. case ISD::SETUGT:
  3649. if (VecVT == MVT::v16i8)
  3650. return PPC::VCMPGTUB;
  3651. else if (VecVT == MVT::v8i16)
  3652. return PPC::VCMPGTUH;
  3653. else if (VecVT == MVT::v4i32)
  3654. return PPC::VCMPGTUW;
  3655. else if (VecVT == MVT::v2i64)
  3656. return PPC::VCMPGTUD;
  3657. else if (VecVT == MVT::v1i128)
  3658. return PPC::VCMPGTUQ;
  3659. break;
  3660. default:
  3661. break;
  3662. }
  3663. llvm_unreachable("Invalid integer vector compare condition");
  3664. }
  3665. }
  3666. bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
  3667. SDLoc dl(N);
  3668. unsigned Imm;
  3669. bool IsStrict = N->isStrictFPOpcode();
  3670. ISD::CondCode CC =
  3671. cast<CondCodeSDNode>(N->getOperand(IsStrict ? 3 : 2))->get();
  3672. EVT PtrVT =
  3673. CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());
  3674. bool isPPC64 = (PtrVT == MVT::i64);
  3675. SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
  3676. SDValue LHS = N->getOperand(IsStrict ? 1 : 0);
  3677. SDValue RHS = N->getOperand(IsStrict ? 2 : 1);
  3678. if (!IsStrict && !Subtarget->useCRBits() && isInt32Immediate(RHS, Imm)) {
  3679. // We can codegen setcc op, imm very efficiently compared to a brcond.
  3680. // Check for those cases here.
  3681. // setcc op, 0
  3682. if (Imm == 0) {
  3683. SDValue Op = LHS;
  3684. switch (CC) {
  3685. default: break;
  3686. case ISD::SETEQ: {
  3687. Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0);
  3688. SDValue Ops[] = { Op, getI32Imm(27, dl), getI32Imm(5, dl),
  3689. getI32Imm(31, dl) };
  3690. CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
  3691. return true;
  3692. }
  3693. case ISD::SETNE: {
  3694. if (isPPC64) break;
  3695. SDValue AD =
  3696. SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
  3697. Op, getI32Imm(~0U, dl)), 0);
  3698. CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op, AD.getValue(1));
  3699. return true;
  3700. }
  3701. case ISD::SETLT: {
  3702. SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
  3703. getI32Imm(31, dl) };
  3704. CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
  3705. return true;
  3706. }
  3707. case ISD::SETGT: {
  3708. SDValue T =
  3709. SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0);
  3710. T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0);
  3711. SDValue Ops[] = { T, getI32Imm(1, dl), getI32Imm(31, dl),
  3712. getI32Imm(31, dl) };
  3713. CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
  3714. return true;
  3715. }
  3716. }
  3717. } else if (Imm == ~0U) { // setcc op, -1
  3718. SDValue Op = LHS;
  3719. switch (CC) {
  3720. default: break;
  3721. case ISD::SETEQ:
  3722. if (isPPC64) break;
  3723. Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
  3724. Op, getI32Imm(1, dl)), 0);
  3725. CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
  3726. SDValue(CurDAG->getMachineNode(PPC::LI, dl,
  3727. MVT::i32,
  3728. getI32Imm(0, dl)),
  3729. 0), Op.getValue(1));
  3730. return true;
  3731. case ISD::SETNE: {
  3732. if (isPPC64) break;
  3733. Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0);
  3734. SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
  3735. Op, getI32Imm(~0U, dl));
  3736. CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0), Op,
  3737. SDValue(AD, 1));
  3738. return true;
  3739. }
  3740. case ISD::SETLT: {
  3741. SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op,
  3742. getI32Imm(1, dl)), 0);
  3743. SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD,
  3744. Op), 0);
  3745. SDValue Ops[] = { AN, getI32Imm(1, dl), getI32Imm(31, dl),
  3746. getI32Imm(31, dl) };
  3747. CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
  3748. return true;
  3749. }
  3750. case ISD::SETGT: {
  3751. SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
  3752. getI32Imm(31, dl) };
  3753. Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
  3754. CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op, getI32Imm(1, dl));
  3755. return true;
  3756. }
  3757. }
  3758. }
  3759. }
  3760. // Altivec Vector compare instructions do not set any CR register by default and
  3761. // vector compare operations return the same type as the operands.
  3762. if (!IsStrict && LHS.getValueType().isVector()) {
  3763. if (Subtarget->hasSPE())
  3764. return false;
  3765. EVT VecVT = LHS.getValueType();
  3766. bool Swap, Negate;
  3767. unsigned int VCmpInst =
  3768. getVCmpInst(VecVT.getSimpleVT(), CC, Subtarget->hasVSX(), Swap, Negate);
  3769. if (Swap)
  3770. std::swap(LHS, RHS);
  3771. EVT ResVT = VecVT.changeVectorElementTypeToInteger();
  3772. if (Negate) {
  3773. SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, ResVT, LHS, RHS), 0);
  3774. CurDAG->SelectNodeTo(N, Subtarget->hasVSX() ? PPC::XXLNOR : PPC::VNOR,
  3775. ResVT, VCmp, VCmp);
  3776. return true;
  3777. }
  3778. CurDAG->SelectNodeTo(N, VCmpInst, ResVT, LHS, RHS);
  3779. return true;
  3780. }
  3781. if (Subtarget->useCRBits())
  3782. return false;
  3783. bool Inv;
  3784. unsigned Idx = getCRIdxForSetCC(CC, Inv);
  3785. SDValue CCReg = SelectCC(LHS, RHS, CC, dl, Chain);
  3786. if (IsStrict)
  3787. CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), CCReg.getValue(1));
  3788. SDValue IntCR;
  3789. // SPE e*cmp* instructions only set the 'gt' bit, so hard-code that
  3790. // The correct compare instruction is already set by SelectCC()
  3791. if (Subtarget->hasSPE() && LHS.getValueType().isFloatingPoint()) {
  3792. Idx = 1;
  3793. }
  3794. // Force the ccreg into CR7.
  3795. SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32);
  3796. SDValue InFlag(nullptr, 0); // Null incoming flag value.
  3797. CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg,
  3798. InFlag).getValue(1);
  3799. IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,
  3800. CCReg), 0);
  3801. SDValue Ops[] = { IntCR, getI32Imm((32 - (3 - Idx)) & 31, dl),
  3802. getI32Imm(31, dl), getI32Imm(31, dl) };
  3803. if (!Inv) {
  3804. CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
  3805. return true;
  3806. }
  3807. // Get the specified bit.
  3808. SDValue Tmp =
  3809. SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
  3810. CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1, dl));
  3811. return true;
  3812. }
  3813. /// Does this node represent a load/store node whose address can be represented
  3814. /// with a register plus an immediate that's a multiple of \p Val:
  3815. bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const {
  3816. LoadSDNode *LDN = dyn_cast<LoadSDNode>(N);
  3817. StoreSDNode *STN = dyn_cast<StoreSDNode>(N);
  3818. SDValue AddrOp;
  3819. if (LDN)
  3820. AddrOp = LDN->getOperand(1);
  3821. else if (STN)
  3822. AddrOp = STN->getOperand(2);
  3823. // If the address points a frame object or a frame object with an offset,
  3824. // we need to check the object alignment.
  3825. short Imm = 0;
  3826. if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(
  3827. AddrOp.getOpcode() == ISD::ADD ? AddrOp.getOperand(0) :
  3828. AddrOp)) {
  3829. // If op0 is a frame index that is under aligned, we can't do it either,
  3830. // because it is translated to r31 or r1 + slot + offset. We won't know the
  3831. // slot number until the stack frame is finalized.
  3832. const MachineFrameInfo &MFI = CurDAG->getMachineFunction().getFrameInfo();
  3833. unsigned SlotAlign = MFI.getObjectAlign(FI->getIndex()).value();
  3834. if ((SlotAlign % Val) != 0)
  3835. return false;
  3836. // If we have an offset, we need further check on the offset.
  3837. if (AddrOp.getOpcode() != ISD::ADD)
  3838. return true;
  3839. }
  3840. if (AddrOp.getOpcode() == ISD::ADD)
  3841. return isIntS16Immediate(AddrOp.getOperand(1), Imm) && !(Imm % Val);
  3842. // If the address comes from the outside, the offset will be zero.
  3843. return AddrOp.getOpcode() == ISD::CopyFromReg;
  3844. }
  3845. void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
  3846. // Transfer memoperands.
  3847. MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
  3848. CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
  3849. }
  3850. static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG,
  3851. bool &NeedSwapOps, bool &IsUnCmp) {
  3852. assert(N->getOpcode() == ISD::SELECT_CC && "Expecting a SELECT_CC here.");
  3853. SDValue LHS = N->getOperand(0);
  3854. SDValue RHS = N->getOperand(1);
  3855. SDValue TrueRes = N->getOperand(2);
  3856. SDValue FalseRes = N->getOperand(3);
  3857. ConstantSDNode *TrueConst = dyn_cast<ConstantSDNode>(TrueRes);
  3858. if (!TrueConst || (N->getSimpleValueType(0) != MVT::i64 &&
  3859. N->getSimpleValueType(0) != MVT::i32))
  3860. return false;
  3861. // We are looking for any of:
  3862. // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, cc2)), cc1)
  3863. // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, cc2)), cc1)
  3864. // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, 1, -1, cc2), seteq)
  3865. // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, -1, 1, cc2), seteq)
  3866. int64_t TrueResVal = TrueConst->getSExtValue();
  3867. if ((TrueResVal < -1 || TrueResVal > 1) ||
  3868. (TrueResVal == -1 && FalseRes.getOpcode() != ISD::ZERO_EXTEND) ||
  3869. (TrueResVal == 1 && FalseRes.getOpcode() != ISD::SIGN_EXTEND) ||
  3870. (TrueResVal == 0 &&
  3871. (FalseRes.getOpcode() != ISD::SELECT_CC || CC != ISD::SETEQ)))
  3872. return false;
  3873. SDValue SetOrSelCC = FalseRes.getOpcode() == ISD::SELECT_CC
  3874. ? FalseRes
  3875. : FalseRes.getOperand(0);
  3876. bool InnerIsSel = SetOrSelCC.getOpcode() == ISD::SELECT_CC;
  3877. if (SetOrSelCC.getOpcode() != ISD::SETCC &&
  3878. SetOrSelCC.getOpcode() != ISD::SELECT_CC)
  3879. return false;
  3880. // Without this setb optimization, the outer SELECT_CC will be manually
  3881. // selected to SELECT_CC_I4/SELECT_CC_I8 Pseudo, then expand-isel-pseudos pass
  3882. // transforms pseudo instruction to isel instruction. When there are more than
  3883. // one use for result like zext/sext, with current optimization we only see
  3884. // isel is replaced by setb but can't see any significant gain. Since
  3885. // setb has longer latency than original isel, we should avoid this. Another
  3886. // point is that setb requires comparison always kept, it can break the
  3887. // opportunity to get the comparison away if we have in future.
  3888. if (!SetOrSelCC.hasOneUse() || (!InnerIsSel && !FalseRes.hasOneUse()))
  3889. return false;
  3890. SDValue InnerLHS = SetOrSelCC.getOperand(0);
  3891. SDValue InnerRHS = SetOrSelCC.getOperand(1);
  3892. ISD::CondCode InnerCC =
  3893. cast<CondCodeSDNode>(SetOrSelCC.getOperand(InnerIsSel ? 4 : 2))->get();
  3894. // If the inner comparison is a select_cc, make sure the true/false values are
  3895. // 1/-1 and canonicalize it if needed.
  3896. if (InnerIsSel) {
  3897. ConstantSDNode *SelCCTrueConst =
  3898. dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(2));
  3899. ConstantSDNode *SelCCFalseConst =
  3900. dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(3));
  3901. if (!SelCCTrueConst || !SelCCFalseConst)
  3902. return false;
  3903. int64_t SelCCTVal = SelCCTrueConst->getSExtValue();
  3904. int64_t SelCCFVal = SelCCFalseConst->getSExtValue();
  3905. // The values must be -1/1 (requiring a swap) or 1/-1.
  3906. if (SelCCTVal == -1 && SelCCFVal == 1) {
  3907. std::swap(InnerLHS, InnerRHS);
  3908. } else if (SelCCTVal != 1 || SelCCFVal != -1)
  3909. return false;
  3910. }
  3911. // Canonicalize unsigned case
  3912. if (InnerCC == ISD::SETULT || InnerCC == ISD::SETUGT) {
  3913. IsUnCmp = true;
  3914. InnerCC = (InnerCC == ISD::SETULT) ? ISD::SETLT : ISD::SETGT;
  3915. }
  3916. bool InnerSwapped = false;
  3917. if (LHS == InnerRHS && RHS == InnerLHS)
  3918. InnerSwapped = true;
  3919. else if (LHS != InnerLHS || RHS != InnerRHS)
  3920. return false;
  3921. switch (CC) {
  3922. // (select_cc lhs, rhs, 0, \
  3923. // (select_cc [lr]hs, [lr]hs, 1, -1, setlt/setgt), seteq)
  3924. case ISD::SETEQ:
  3925. if (!InnerIsSel)
  3926. return false;
  3927. if (InnerCC != ISD::SETLT && InnerCC != ISD::SETGT)
  3928. return false;
  3929. NeedSwapOps = (InnerCC == ISD::SETGT) ? InnerSwapped : !InnerSwapped;
  3930. break;
  3931. // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
  3932. // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setgt)), setu?lt)
  3933. // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setlt)), setu?lt)
  3934. // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
  3935. // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setgt)), setu?lt)
  3936. // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setlt)), setu?lt)
  3937. case ISD::SETULT:
  3938. if (!IsUnCmp && InnerCC != ISD::SETNE)
  3939. return false;
  3940. IsUnCmp = true;
  3941. LLVM_FALLTHROUGH;
  3942. case ISD::SETLT:
  3943. if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETGT && !InnerSwapped) ||
  3944. (InnerCC == ISD::SETLT && InnerSwapped))
  3945. NeedSwapOps = (TrueResVal == 1);
  3946. else
  3947. return false;
  3948. break;
  3949. // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
  3950. // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setlt)), setu?gt)
  3951. // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setgt)), setu?gt)
  3952. // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
  3953. // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setlt)), setu?gt)
  3954. // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setgt)), setu?gt)
  3955. case ISD::SETUGT:
  3956. if (!IsUnCmp && InnerCC != ISD::SETNE)
  3957. return false;
  3958. IsUnCmp = true;
  3959. LLVM_FALLTHROUGH;
  3960. case ISD::SETGT:
  3961. if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETLT && !InnerSwapped) ||
  3962. (InnerCC == ISD::SETGT && InnerSwapped))
  3963. NeedSwapOps = (TrueResVal == -1);
  3964. else
  3965. return false;
  3966. break;
  3967. default:
  3968. return false;
  3969. }
  3970. LLVM_DEBUG(dbgs() << "Found a node that can be lowered to a SETB: ");
  3971. LLVM_DEBUG(N->dump());
  3972. return true;
  3973. }
  3974. // Return true if it's a software square-root/divide operand.
  3975. static bool isSWTestOp(SDValue N) {
  3976. if (N.getOpcode() == PPCISD::FTSQRT)
  3977. return true;
  3978. if (N.getNumOperands() < 1 || !isa<ConstantSDNode>(N.getOperand(0)))
  3979. return false;
  3980. switch (N.getConstantOperandVal(0)) {
  3981. case Intrinsic::ppc_vsx_xvtdivdp:
  3982. case Intrinsic::ppc_vsx_xvtdivsp:
  3983. case Intrinsic::ppc_vsx_xvtsqrtdp:
  3984. case Intrinsic::ppc_vsx_xvtsqrtsp:
  3985. return true;
  3986. }
  3987. return false;
  3988. }
  3989. bool PPCDAGToDAGISel::tryFoldSWTestBRCC(SDNode *N) {
  3990. assert(N->getOpcode() == ISD::BR_CC && "ISD::BR_CC is expected.");
  3991. // We are looking for following patterns, where `truncate to i1` actually has
  3992. // the same semantic with `and 1`.
  3993. // (br_cc seteq, (truncateToi1 SWTestOp), 0) -> (BCC PRED_NU, SWTestOp)
  3994. // (br_cc seteq, (and SWTestOp, 2), 0) -> (BCC PRED_NE, SWTestOp)
  3995. // (br_cc seteq, (and SWTestOp, 4), 0) -> (BCC PRED_LE, SWTestOp)
  3996. // (br_cc seteq, (and SWTestOp, 8), 0) -> (BCC PRED_GE, SWTestOp)
  3997. // (br_cc setne, (truncateToi1 SWTestOp), 0) -> (BCC PRED_UN, SWTestOp)
  3998. // (br_cc setne, (and SWTestOp, 2), 0) -> (BCC PRED_EQ, SWTestOp)
  3999. // (br_cc setne, (and SWTestOp, 4), 0) -> (BCC PRED_GT, SWTestOp)
  4000. // (br_cc setne, (and SWTestOp, 8), 0) -> (BCC PRED_LT, SWTestOp)
  4001. ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
  4002. if (CC != ISD::SETEQ && CC != ISD::SETNE)
  4003. return false;
  4004. SDValue CmpRHS = N->getOperand(3);
  4005. if (!isa<ConstantSDNode>(CmpRHS) ||
  4006. cast<ConstantSDNode>(CmpRHS)->getSExtValue() != 0)
  4007. return false;
  4008. SDValue CmpLHS = N->getOperand(2);
  4009. if (CmpLHS.getNumOperands() < 1 || !isSWTestOp(CmpLHS.getOperand(0)))
  4010. return false;
  4011. unsigned PCC = 0;
  4012. bool IsCCNE = CC == ISD::SETNE;
  4013. if (CmpLHS.getOpcode() == ISD::AND &&
  4014. isa<ConstantSDNode>(CmpLHS.getOperand(1)))
  4015. switch (CmpLHS.getConstantOperandVal(1)) {
  4016. case 1:
  4017. PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU;
  4018. break;
  4019. case 2:
  4020. PCC = IsCCNE ? PPC::PRED_EQ : PPC::PRED_NE;
  4021. break;
  4022. case 4:
  4023. PCC = IsCCNE ? PPC::PRED_GT : PPC::PRED_LE;
  4024. break;
  4025. case 8:
  4026. PCC = IsCCNE ? PPC::PRED_LT : PPC::PRED_GE;
  4027. break;
  4028. default:
  4029. return false;
  4030. }
  4031. else if (CmpLHS.getOpcode() == ISD::TRUNCATE &&
  4032. CmpLHS.getValueType() == MVT::i1)
  4033. PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU;
  4034. if (PCC) {
  4035. SDLoc dl(N);
  4036. SDValue Ops[] = {getI32Imm(PCC, dl), CmpLHS.getOperand(0), N->getOperand(4),
  4037. N->getOperand(0)};
  4038. CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
  4039. return true;
  4040. }
  4041. return false;
  4042. }
  4043. bool PPCDAGToDAGISel::tryAsSingleRLWINM(SDNode *N) {
  4044. assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
  4045. unsigned Imm;
  4046. if (!isInt32Immediate(N->getOperand(1), Imm))
  4047. return false;
  4048. SDLoc dl(N);
  4049. SDValue Val = N->getOperand(0);
  4050. unsigned SH, MB, ME;
  4051. // If this is an and of a value rotated between 0 and 31 bits and then and'd
  4052. // with a mask, emit rlwinm
  4053. if (isRotateAndMask(Val.getNode(), Imm, false, SH, MB, ME)) {
  4054. Val = Val.getOperand(0);
  4055. SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl),
  4056. getI32Imm(ME, dl)};
  4057. CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
  4058. return true;
  4059. }
  4060. // If this is just a masked value where the input is not handled, and
  4061. // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
  4062. if (isRunOfOnes(Imm, MB, ME) && Val.getOpcode() != ISD::ROTL) {
  4063. SDValue Ops[] = {Val, getI32Imm(0, dl), getI32Imm(MB, dl),
  4064. getI32Imm(ME, dl)};
  4065. CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
  4066. return true;
  4067. }
  4068. // AND X, 0 -> 0, not "rlwinm 32".
  4069. if (Imm == 0) {
  4070. ReplaceUses(SDValue(N, 0), N->getOperand(1));
  4071. return true;
  4072. }
  4073. return false;
  4074. }
  4075. bool PPCDAGToDAGISel::tryAsSingleRLWINM8(SDNode *N) {
  4076. assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
  4077. uint64_t Imm64;
  4078. if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64))
  4079. return false;
  4080. unsigned MB, ME;
  4081. if (isRunOfOnes64(Imm64, MB, ME) && MB >= 32 && MB <= ME) {
  4082. // MB ME
  4083. // +----------------------+
  4084. // |xxxxxxxxxxx00011111000|
  4085. // +----------------------+
  4086. // 0 32 64
  4087. // We can only do it if the MB is larger than 32 and MB <= ME
  4088. // as RLWINM will replace the contents of [0 - 32) with [32 - 64) even
  4089. // we didn't rotate it.
  4090. SDLoc dl(N);
  4091. SDValue Ops[] = {N->getOperand(0), getI64Imm(0, dl), getI64Imm(MB - 32, dl),
  4092. getI64Imm(ME - 32, dl)};
  4093. CurDAG->SelectNodeTo(N, PPC::RLWINM8, MVT::i64, Ops);
  4094. return true;
  4095. }
  4096. return false;
  4097. }
  4098. bool PPCDAGToDAGISel::tryAsPairOfRLDICL(SDNode *N) {
  4099. assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
  4100. uint64_t Imm64;
  4101. if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64))
  4102. return false;
  4103. // Do nothing if it is 16-bit imm as the pattern in the .td file handle
  4104. // it well with "andi.".
  4105. if (isUInt<16>(Imm64))
  4106. return false;
  4107. SDLoc Loc(N);
  4108. SDValue Val = N->getOperand(0);
  4109. // Optimized with two rldicl's as follows:
  4110. // Add missing bits on left to the mask and check that the mask is a
  4111. // wrapped run of ones, i.e.
  4112. // Change pattern |0001111100000011111111|
  4113. // to |1111111100000011111111|.
  4114. unsigned NumOfLeadingZeros = countLeadingZeros(Imm64);
  4115. if (NumOfLeadingZeros != 0)
  4116. Imm64 |= maskLeadingOnes<uint64_t>(NumOfLeadingZeros);
  4117. unsigned MB, ME;
  4118. if (!isRunOfOnes64(Imm64, MB, ME))
  4119. return false;
  4120. // ME MB MB-ME+63
  4121. // +----------------------+ +----------------------+
  4122. // |1111111100000011111111| -> |0000001111111111111111|
  4123. // +----------------------+ +----------------------+
  4124. // 0 63 0 63
  4125. // There are ME + 1 ones on the left and (MB - ME + 63) & 63 zeros in between.
  4126. unsigned OnesOnLeft = ME + 1;
  4127. unsigned ZerosInBetween = (MB - ME + 63) & 63;
  4128. // Rotate left by OnesOnLeft (so leading ones are now trailing ones) and clear
  4129. // on the left the bits that are already zeros in the mask.
  4130. Val = SDValue(CurDAG->getMachineNode(PPC::RLDICL, Loc, MVT::i64, Val,
  4131. getI64Imm(OnesOnLeft, Loc),
  4132. getI64Imm(ZerosInBetween, Loc)),
  4133. 0);
  4134. // MB-ME+63 ME MB
  4135. // +----------------------+ +----------------------+
  4136. // |0000001111111111111111| -> |0001111100000011111111|
  4137. // +----------------------+ +----------------------+
  4138. // 0 63 0 63
  4139. // Rotate back by 64 - OnesOnLeft to undo previous rotate. Then clear on the
  4140. // left the number of ones we previously added.
  4141. SDValue Ops[] = {Val, getI64Imm(64 - OnesOnLeft, Loc),
  4142. getI64Imm(NumOfLeadingZeros, Loc)};
  4143. CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
  4144. return true;
  4145. }
  4146. bool PPCDAGToDAGISel::tryAsSingleRLWIMI(SDNode *N) {
  4147. assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
  4148. unsigned Imm;
  4149. if (!isInt32Immediate(N->getOperand(1), Imm))
  4150. return false;
  4151. SDValue Val = N->getOperand(0);
  4152. unsigned Imm2;
  4153. // ISD::OR doesn't get all the bitfield insertion fun.
  4154. // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a
  4155. // bitfield insert.
  4156. if (Val.getOpcode() != ISD::OR || !isInt32Immediate(Val.getOperand(1), Imm2))
  4157. return false;
  4158. // The idea here is to check whether this is equivalent to:
  4159. // (c1 & m) | (x & ~m)
  4160. // where m is a run-of-ones mask. The logic here is that, for each bit in
  4161. // c1 and c2:
  4162. // - if both are 1, then the output will be 1.
  4163. // - if both are 0, then the output will be 0.
  4164. // - if the bit in c1 is 0, and the bit in c2 is 1, then the output will
  4165. // come from x.
  4166. // - if the bit in c1 is 1, and the bit in c2 is 0, then the output will
  4167. // be 0.
  4168. // If that last condition is never the case, then we can form m from the
  4169. // bits that are the same between c1 and c2.
  4170. unsigned MB, ME;
  4171. if (isRunOfOnes(~(Imm ^ Imm2), MB, ME) && !(~Imm & Imm2)) {
  4172. SDLoc dl(N);
  4173. SDValue Ops[] = {Val.getOperand(0), Val.getOperand(1), getI32Imm(0, dl),
  4174. getI32Imm(MB, dl), getI32Imm(ME, dl)};
  4175. ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
  4176. return true;
  4177. }
  4178. return false;
  4179. }
  4180. bool PPCDAGToDAGISel::tryAsSingleRLDICL(SDNode *N) {
  4181. assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
  4182. uint64_t Imm64;
  4183. if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || !isMask_64(Imm64))
  4184. return false;
  4185. // If this is a 64-bit zero-extension mask, emit rldicl.
  4186. unsigned MB = 64 - countTrailingOnes(Imm64);
  4187. unsigned SH = 0;
  4188. unsigned Imm;
  4189. SDValue Val = N->getOperand(0);
  4190. SDLoc dl(N);
  4191. if (Val.getOpcode() == ISD::ANY_EXTEND) {
  4192. auto Op0 = Val.getOperand(0);
  4193. if (Op0.getOpcode() == ISD::SRL &&
  4194. isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) {
  4195. auto ResultType = Val.getNode()->getValueType(0);
  4196. auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, ResultType);
  4197. SDValue IDVal(ImDef, 0);
  4198. Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, ResultType,
  4199. IDVal, Op0.getOperand(0),
  4200. getI32Imm(1, dl)),
  4201. 0);
  4202. SH = 64 - Imm;
  4203. }
  4204. }
  4205. // If the operand is a logical right shift, we can fold it into this
  4206. // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb)
  4207. // for n <= mb. The right shift is really a left rotate followed by a
  4208. // mask, and this mask is a more-restrictive sub-mask of the mask implied
  4209. // by the shift.
  4210. if (Val.getOpcode() == ISD::SRL &&
  4211. isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) {
  4212. assert(Imm < 64 && "Illegal shift amount");
  4213. Val = Val.getOperand(0);
  4214. SH = 64 - Imm;
  4215. }
  4216. SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl)};
  4217. CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
  4218. return true;
  4219. }
  4220. bool PPCDAGToDAGISel::tryAsSingleRLDICR(SDNode *N) {
  4221. assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
  4222. uint64_t Imm64;
  4223. if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) ||
  4224. !isMask_64(~Imm64))
  4225. return false;
  4226. // If this is a negated 64-bit zero-extension mask,
  4227. // i.e. the immediate is a sequence of ones from most significant side
  4228. // and all zero for reminder, we should use rldicr.
  4229. unsigned MB = 63 - countTrailingOnes(~Imm64);
  4230. unsigned SH = 0;
  4231. SDLoc dl(N);
  4232. SDValue Ops[] = {N->getOperand(0), getI32Imm(SH, dl), getI32Imm(MB, dl)};
  4233. CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops);
  4234. return true;
  4235. }
  4236. bool PPCDAGToDAGISel::tryAsSingleRLDIMI(SDNode *N) {
  4237. assert(N->getOpcode() == ISD::OR && "ISD::OR SDNode expected");
  4238. uint64_t Imm64;
  4239. unsigned MB, ME;
  4240. SDValue N0 = N->getOperand(0);
  4241. // We won't get fewer instructions if the imm is 32-bit integer.
  4242. // rldimi requires the imm to have consecutive ones with both sides zero.
  4243. // Also, make sure the first Op has only one use, otherwise this may increase
  4244. // register pressure since rldimi is destructive.
  4245. if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) ||
  4246. isUInt<32>(Imm64) || !isRunOfOnes64(Imm64, MB, ME) || !N0.hasOneUse())
  4247. return false;
  4248. unsigned SH = 63 - ME;
  4249. SDLoc Dl(N);
  4250. // Use select64Imm for making LI instr instead of directly putting Imm64
  4251. SDValue Ops[] = {
  4252. N->getOperand(0),
  4253. SDValue(selectI64Imm(CurDAG, getI64Imm(-1, Dl).getNode()), 0),
  4254. getI32Imm(SH, Dl), getI32Imm(MB, Dl)};
  4255. CurDAG->SelectNodeTo(N, PPC::RLDIMI, MVT::i64, Ops);
  4256. return true;
  4257. }
  4258. // Select - Convert the specified operand from a target-independent to a
  4259. // target-specific node if it hasn't already been changed.
  4260. void PPCDAGToDAGISel::Select(SDNode *N) {
  4261. SDLoc dl(N);
  4262. if (N->isMachineOpcode()) {
  4263. N->setNodeId(-1);
  4264. return; // Already selected.
  4265. }
  4266. // In case any misguided DAG-level optimizations form an ADD with a
  4267. // TargetConstant operand, crash here instead of miscompiling (by selecting
  4268. // an r+r add instead of some kind of r+i add).
  4269. if (N->getOpcode() == ISD::ADD &&
  4270. N->getOperand(1).getOpcode() == ISD::TargetConstant)
  4271. llvm_unreachable("Invalid ADD with TargetConstant operand");
  4272. // Try matching complex bit permutations before doing anything else.
  4273. if (tryBitPermutation(N))
  4274. return;
  4275. // Try to emit integer compares as GPR-only sequences (i.e. no use of CR).
  4276. if (tryIntCompareInGPR(N))
  4277. return;
  4278. switch (N->getOpcode()) {
  4279. default: break;
  4280. case ISD::Constant:
  4281. if (N->getValueType(0) == MVT::i64) {
  4282. ReplaceNode(N, selectI64Imm(CurDAG, N));
  4283. return;
  4284. }
  4285. break;
  4286. case ISD::INTRINSIC_WO_CHAIN: {
  4287. if (!Subtarget->isISA3_1())
  4288. break;
  4289. unsigned Opcode = 0;
  4290. switch (N->getConstantOperandVal(0)) {
  4291. default:
  4292. break;
  4293. case Intrinsic::ppc_altivec_vstribr_p:
  4294. Opcode = PPC::VSTRIBR_rec;
  4295. break;
  4296. case Intrinsic::ppc_altivec_vstribl_p:
  4297. Opcode = PPC::VSTRIBL_rec;
  4298. break;
  4299. case Intrinsic::ppc_altivec_vstrihr_p:
  4300. Opcode = PPC::VSTRIHR_rec;
  4301. break;
  4302. case Intrinsic::ppc_altivec_vstrihl_p:
  4303. Opcode = PPC::VSTRIHL_rec;
  4304. break;
  4305. }
  4306. if (!Opcode)
  4307. break;
  4308. // Generate the appropriate vector string isolate intrinsic to match.
  4309. EVT VTs[] = {MVT::v16i8, MVT::Glue};
  4310. SDValue VecStrOp =
  4311. SDValue(CurDAG->getMachineNode(Opcode, dl, VTs, N->getOperand(2)), 0);
  4312. // Vector string isolate instructions update the EQ bit of CR6.
  4313. // Generate a SETBC instruction to extract the bit and place it in a GPR.
  4314. SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_eq, dl, MVT::i32);
  4315. SDValue CR6Reg = CurDAG->getRegister(PPC::CR6, MVT::i32);
  4316. SDValue CRBit = SDValue(
  4317. CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
  4318. CR6Reg, SubRegIdx, VecStrOp.getValue(1)),
  4319. 0);
  4320. CurDAG->SelectNodeTo(N, PPC::SETBC, MVT::i32, CRBit);
  4321. return;
  4322. }
  4323. case ISD::SETCC:
  4324. case ISD::STRICT_FSETCC:
  4325. case ISD::STRICT_FSETCCS:
  4326. if (trySETCC(N))
  4327. return;
  4328. break;
  4329. // These nodes will be transformed into GETtlsADDR32 node, which
  4330. // later becomes BL_TLS __tls_get_addr(sym at tlsgd)@PLT
  4331. case PPCISD::ADDI_TLSLD_L_ADDR:
  4332. case PPCISD::ADDI_TLSGD_L_ADDR: {
  4333. const Module *Mod = MF->getFunction().getParent();
  4334. if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 ||
  4335. !Subtarget->isSecurePlt() || !Subtarget->isTargetELF() ||
  4336. Mod->getPICLevel() == PICLevel::SmallPIC)
  4337. break;
  4338. // Attach global base pointer on GETtlsADDR32 node in order to
  4339. // generate secure plt code for TLS symbols.
  4340. getGlobalBaseReg();
  4341. } break;
  4342. case PPCISD::CALL: {
  4343. if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 ||
  4344. !TM.isPositionIndependent() || !Subtarget->isSecurePlt() ||
  4345. !Subtarget->isTargetELF())
  4346. break;
  4347. SDValue Op = N->getOperand(1);
  4348. if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
  4349. if (GA->getTargetFlags() == PPCII::MO_PLT)
  4350. getGlobalBaseReg();
  4351. }
  4352. else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) {
  4353. if (ES->getTargetFlags() == PPCII::MO_PLT)
  4354. getGlobalBaseReg();
  4355. }
  4356. }
  4357. break;
  4358. case PPCISD::GlobalBaseReg:
  4359. ReplaceNode(N, getGlobalBaseReg());
  4360. return;
  4361. case ISD::FrameIndex:
  4362. selectFrameIndex(N, N);
  4363. return;
  4364. case PPCISD::MFOCRF: {
  4365. SDValue InFlag = N->getOperand(1);
  4366. ReplaceNode(N, CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32,
  4367. N->getOperand(0), InFlag));
  4368. return;
  4369. }
  4370. case PPCISD::READ_TIME_BASE:
  4371. ReplaceNode(N, CurDAG->getMachineNode(PPC::ReadTB, dl, MVT::i32, MVT::i32,
  4372. MVT::Other, N->getOperand(0)));
  4373. return;
  4374. case PPCISD::SRA_ADDZE: {
  4375. SDValue N0 = N->getOperand(0);
  4376. SDValue ShiftAmt =
  4377. CurDAG->getTargetConstant(*cast<ConstantSDNode>(N->getOperand(1))->
  4378. getConstantIntValue(), dl,
  4379. N->getValueType(0));
  4380. if (N->getValueType(0) == MVT::i64) {
  4381. SDNode *Op =
  4382. CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, MVT::Glue,
  4383. N0, ShiftAmt);
  4384. CurDAG->SelectNodeTo(N, PPC::ADDZE8, MVT::i64, SDValue(Op, 0),
  4385. SDValue(Op, 1));
  4386. return;
  4387. } else {
  4388. assert(N->getValueType(0) == MVT::i32 &&
  4389. "Expecting i64 or i32 in PPCISD::SRA_ADDZE");
  4390. SDNode *Op =
  4391. CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
  4392. N0, ShiftAmt);
  4393. CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, SDValue(Op, 0),
  4394. SDValue(Op, 1));
  4395. return;
  4396. }
  4397. }
  4398. case ISD::STORE: {
  4399. // Change TLS initial-exec D-form stores to X-form stores.
  4400. StoreSDNode *ST = cast<StoreSDNode>(N);
  4401. if (EnableTLSOpt && Subtarget->isELFv2ABI() &&
  4402. ST->getAddressingMode() != ISD::PRE_INC)
  4403. if (tryTLSXFormStore(ST))
  4404. return;
  4405. break;
  4406. }
  4407. case ISD::LOAD: {
  4408. // Handle preincrement loads.
  4409. LoadSDNode *LD = cast<LoadSDNode>(N);
  4410. EVT LoadedVT = LD->getMemoryVT();
  4411. // Normal loads are handled by code generated from the .td file.
  4412. if (LD->getAddressingMode() != ISD::PRE_INC) {
  4413. // Change TLS initial-exec D-form loads to X-form loads.
  4414. if (EnableTLSOpt && Subtarget->isELFv2ABI())
  4415. if (tryTLSXFormLoad(LD))
  4416. return;
  4417. break;
  4418. }
  4419. SDValue Offset = LD->getOffset();
  4420. if (Offset.getOpcode() == ISD::TargetConstant ||
  4421. Offset.getOpcode() == ISD::TargetGlobalAddress) {
  4422. unsigned Opcode;
  4423. bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
  4424. if (LD->getValueType(0) != MVT::i64) {
  4425. // Handle PPC32 integer and normal FP loads.
  4426. assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
  4427. switch (LoadedVT.getSimpleVT().SimpleTy) {
  4428. default: llvm_unreachable("Invalid PPC load type!");
  4429. case MVT::f64: Opcode = PPC::LFDU; break;
  4430. case MVT::f32: Opcode = PPC::LFSU; break;
  4431. case MVT::i32: Opcode = PPC::LWZU; break;
  4432. case MVT::i16: Opcode = isSExt ? PPC::LHAU : PPC::LHZU; break;
  4433. case MVT::i1:
  4434. case MVT::i8: Opcode = PPC::LBZU; break;
  4435. }
  4436. } else {
  4437. assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
  4438. assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
  4439. switch (LoadedVT.getSimpleVT().SimpleTy) {
  4440. default: llvm_unreachable("Invalid PPC load type!");
  4441. case MVT::i64: Opcode = PPC::LDU; break;
  4442. case MVT::i32: Opcode = PPC::LWZU8; break;
  4443. case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break;
  4444. case MVT::i1:
  4445. case MVT::i8: Opcode = PPC::LBZU8; break;
  4446. }
  4447. }
  4448. SDValue Chain = LD->getChain();
  4449. SDValue Base = LD->getBasePtr();
  4450. SDValue Ops[] = { Offset, Base, Chain };
  4451. SDNode *MN = CurDAG->getMachineNode(
  4452. Opcode, dl, LD->getValueType(0),
  4453. PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops);
  4454. transferMemOperands(N, MN);
  4455. ReplaceNode(N, MN);
  4456. return;
  4457. } else {
  4458. unsigned Opcode;
  4459. bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
  4460. if (LD->getValueType(0) != MVT::i64) {
  4461. // Handle PPC32 integer and normal FP loads.
  4462. assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
  4463. switch (LoadedVT.getSimpleVT().SimpleTy) {
  4464. default: llvm_unreachable("Invalid PPC load type!");
  4465. case MVT::f64: Opcode = PPC::LFDUX; break;
  4466. case MVT::f32: Opcode = PPC::LFSUX; break;
  4467. case MVT::i32: Opcode = PPC::LWZUX; break;
  4468. case MVT::i16: Opcode = isSExt ? PPC::LHAUX : PPC::LHZUX; break;
  4469. case MVT::i1:
  4470. case MVT::i8: Opcode = PPC::LBZUX; break;
  4471. }
  4472. } else {
  4473. assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
  4474. assert((!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) &&
  4475. "Invalid sext update load");
  4476. switch (LoadedVT.getSimpleVT().SimpleTy) {
  4477. default: llvm_unreachable("Invalid PPC load type!");
  4478. case MVT::i64: Opcode = PPC::LDUX; break;
  4479. case MVT::i32: Opcode = isSExt ? PPC::LWAUX : PPC::LWZUX8; break;
  4480. case MVT::i16: Opcode = isSExt ? PPC::LHAUX8 : PPC::LHZUX8; break;
  4481. case MVT::i1:
  4482. case MVT::i8: Opcode = PPC::LBZUX8; break;
  4483. }
  4484. }
  4485. SDValue Chain = LD->getChain();
  4486. SDValue Base = LD->getBasePtr();
  4487. SDValue Ops[] = { Base, Offset, Chain };
  4488. SDNode *MN = CurDAG->getMachineNode(
  4489. Opcode, dl, LD->getValueType(0),
  4490. PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops);
  4491. transferMemOperands(N, MN);
  4492. ReplaceNode(N, MN);
  4493. return;
  4494. }
  4495. }
  4496. case ISD::AND:
  4497. // If this is an 'and' with a mask, try to emit rlwinm/rldicl/rldicr
  4498. if (tryAsSingleRLWINM(N) || tryAsSingleRLWIMI(N) || tryAsSingleRLDICL(N) ||
  4499. tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N) || tryAsPairOfRLDICL(N))
  4500. return;
  4501. // Other cases are autogenerated.
  4502. break;
  4503. case ISD::OR: {
  4504. if (N->getValueType(0) == MVT::i32)
  4505. if (tryBitfieldInsert(N))
  4506. return;
  4507. int16_t Imm;
  4508. if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
  4509. isIntS16Immediate(N->getOperand(1), Imm)) {
  4510. KnownBits LHSKnown = CurDAG->computeKnownBits(N->getOperand(0));
  4511. // If this is equivalent to an add, then we can fold it with the
  4512. // FrameIndex calculation.
  4513. if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) {
  4514. selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm);
  4515. return;
  4516. }
  4517. }
  4518. // If this is 'or' against an imm with consecutive ones and both sides zero,
  4519. // try to emit rldimi
  4520. if (tryAsSingleRLDIMI(N))
  4521. return;
  4522. // OR with a 32-bit immediate can be handled by ori + oris
  4523. // without creating an immediate in a GPR.
  4524. uint64_t Imm64 = 0;
  4525. bool IsPPC64 = Subtarget->isPPC64();
  4526. if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) &&
  4527. (Imm64 & ~0xFFFFFFFFuLL) == 0) {
  4528. // If ImmHi (ImmHi) is zero, only one ori (oris) is generated later.
  4529. uint64_t ImmHi = Imm64 >> 16;
  4530. uint64_t ImmLo = Imm64 & 0xFFFF;
  4531. if (ImmHi != 0 && ImmLo != 0) {
  4532. SDNode *Lo = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
  4533. N->getOperand(0),
  4534. getI16Imm(ImmLo, dl));
  4535. SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)};
  4536. CurDAG->SelectNodeTo(N, PPC::ORIS8, MVT::i64, Ops1);
  4537. return;
  4538. }
  4539. }
  4540. // Other cases are autogenerated.
  4541. break;
  4542. }
  4543. case ISD::XOR: {
  4544. // XOR with a 32-bit immediate can be handled by xori + xoris
  4545. // without creating an immediate in a GPR.
  4546. uint64_t Imm64 = 0;
  4547. bool IsPPC64 = Subtarget->isPPC64();
  4548. if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) &&
  4549. (Imm64 & ~0xFFFFFFFFuLL) == 0) {
  4550. // If ImmHi (ImmHi) is zero, only one xori (xoris) is generated later.
  4551. uint64_t ImmHi = Imm64 >> 16;
  4552. uint64_t ImmLo = Imm64 & 0xFFFF;
  4553. if (ImmHi != 0 && ImmLo != 0) {
  4554. SDNode *Lo = CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
  4555. N->getOperand(0),
  4556. getI16Imm(ImmLo, dl));
  4557. SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)};
  4558. CurDAG->SelectNodeTo(N, PPC::XORIS8, MVT::i64, Ops1);
  4559. return;
  4560. }
  4561. }
  4562. break;
  4563. }
  4564. case ISD::ADD: {
  4565. int16_t Imm;
  4566. if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
  4567. isIntS16Immediate(N->getOperand(1), Imm)) {
  4568. selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm);
  4569. return;
  4570. }
  4571. break;
  4572. }
  4573. case ISD::SHL: {
  4574. unsigned Imm, SH, MB, ME;
  4575. if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
  4576. isRotateAndMask(N, Imm, true, SH, MB, ME)) {
  4577. SDValue Ops[] = { N->getOperand(0).getOperand(0),
  4578. getI32Imm(SH, dl), getI32Imm(MB, dl),
  4579. getI32Imm(ME, dl) };
  4580. CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
  4581. return;
  4582. }
  4583. // Other cases are autogenerated.
  4584. break;
  4585. }
  4586. case ISD::SRL: {
  4587. unsigned Imm, SH, MB, ME;
  4588. if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
  4589. isRotateAndMask(N, Imm, true, SH, MB, ME)) {
  4590. SDValue Ops[] = { N->getOperand(0).getOperand(0),
  4591. getI32Imm(SH, dl), getI32Imm(MB, dl),
  4592. getI32Imm(ME, dl) };
  4593. CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
  4594. return;
  4595. }
  4596. // Other cases are autogenerated.
  4597. break;
  4598. }
  4599. case ISD::MUL: {
  4600. SDValue Op1 = N->getOperand(1);
  4601. if (Op1.getOpcode() != ISD::Constant || Op1.getValueType() != MVT::i64)
  4602. break;
  4603. // If the multiplier fits int16, we can handle it with mulli.
  4604. int64_t Imm = cast<ConstantSDNode>(Op1)->getZExtValue();
  4605. unsigned Shift = countTrailingZeros<uint64_t>(Imm);
  4606. if (isInt<16>(Imm) || !Shift)
  4607. break;
  4608. // If the shifted value fits int16, we can do this transformation:
  4609. // (mul X, c1 << c2) -> (rldicr (mulli X, c1) c2). We do this in ISEL due to
  4610. // DAGCombiner prefers (shl (mul X, c1), c2) -> (mul X, c1 << c2).
  4611. uint64_t ImmSh = Imm >> Shift;
  4612. if (isInt<16>(ImmSh)) {
  4613. uint64_t SextImm = SignExtend64(ImmSh & 0xFFFF, 16);
  4614. SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);
  4615. SDNode *MulNode = CurDAG->getMachineNode(PPC::MULLI8, dl, MVT::i64,
  4616. N->getOperand(0), SDImm);
  4617. CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, SDValue(MulNode, 0),
  4618. getI32Imm(Shift, dl), getI32Imm(63 - Shift, dl));
  4619. return;
  4620. }
  4621. break;
  4622. }
  4623. // FIXME: Remove this once the ANDI glue bug is fixed:
  4624. case PPCISD::ANDI_rec_1_EQ_BIT:
  4625. case PPCISD::ANDI_rec_1_GT_BIT: {
  4626. if (!ANDIGlueBug)
  4627. break;
  4628. EVT InVT = N->getOperand(0).getValueType();
  4629. assert((InVT == MVT::i64 || InVT == MVT::i32) &&
  4630. "Invalid input type for ANDI_rec_1_EQ_BIT");
  4631. unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDI8_rec : PPC::ANDI_rec;
  4632. SDValue AndI(CurDAG->getMachineNode(Opcode, dl, InVT, MVT::Glue,
  4633. N->getOperand(0),
  4634. CurDAG->getTargetConstant(1, dl, InVT)),
  4635. 0);
  4636. SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
  4637. SDValue SRIdxVal = CurDAG->getTargetConstant(
  4638. N->getOpcode() == PPCISD::ANDI_rec_1_EQ_BIT ? PPC::sub_eq : PPC::sub_gt,
  4639. dl, MVT::i32);
  4640. CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1, CR0Reg,
  4641. SRIdxVal, SDValue(AndI.getNode(), 1) /* glue */);
  4642. return;
  4643. }
  4644. case ISD::SELECT_CC: {
  4645. ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
  4646. EVT PtrVT =
  4647. CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());
  4648. bool isPPC64 = (PtrVT == MVT::i64);
  4649. // If this is a select of i1 operands, we'll pattern match it.
  4650. if (Subtarget->useCRBits() && N->getOperand(0).getValueType() == MVT::i1)
  4651. break;
  4652. if (Subtarget->isISA3_0() && Subtarget->isPPC64()) {
  4653. bool NeedSwapOps = false;
  4654. bool IsUnCmp = false;
  4655. if (mayUseP9Setb(N, CC, CurDAG, NeedSwapOps, IsUnCmp)) {
  4656. SDValue LHS = N->getOperand(0);
  4657. SDValue RHS = N->getOperand(1);
  4658. if (NeedSwapOps)
  4659. std::swap(LHS, RHS);
  4660. // Make use of SelectCC to generate the comparison to set CR bits, for
  4661. // equality comparisons having one literal operand, SelectCC probably
  4662. // doesn't need to materialize the whole literal and just use xoris to
  4663. // check it first, it leads the following comparison result can't
  4664. // exactly represent GT/LT relationship. So to avoid this we specify
  4665. // SETGT/SETUGT here instead of SETEQ.
  4666. SDValue GenCC =
  4667. SelectCC(LHS, RHS, IsUnCmp ? ISD::SETUGT : ISD::SETGT, dl);
  4668. CurDAG->SelectNodeTo(
  4669. N, N->getSimpleValueType(0) == MVT::i64 ? PPC::SETB8 : PPC::SETB,
  4670. N->getValueType(0), GenCC);
  4671. NumP9Setb++;
  4672. return;
  4673. }
  4674. }
  4675. // Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc
  4676. if (!isPPC64)
  4677. if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
  4678. if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
  4679. if (ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
  4680. if (N1C->isNullValue() && N3C->isNullValue() &&
  4681. N2C->getZExtValue() == 1ULL && CC == ISD::SETNE &&
  4682. // FIXME: Implement this optzn for PPC64.
  4683. N->getValueType(0) == MVT::i32) {
  4684. SDNode *Tmp =
  4685. CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
  4686. N->getOperand(0), getI32Imm(~0U, dl));
  4687. CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(Tmp, 0),
  4688. N->getOperand(0), SDValue(Tmp, 1));
  4689. return;
  4690. }
  4691. SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl);
  4692. if (N->getValueType(0) == MVT::i1) {
  4693. // An i1 select is: (c & t) | (!c & f).
  4694. bool Inv;
  4695. unsigned Idx = getCRIdxForSetCC(CC, Inv);
  4696. unsigned SRI;
  4697. switch (Idx) {
  4698. default: llvm_unreachable("Invalid CC index");
  4699. case 0: SRI = PPC::sub_lt; break;
  4700. case 1: SRI = PPC::sub_gt; break;
  4701. case 2: SRI = PPC::sub_eq; break;
  4702. case 3: SRI = PPC::sub_un; break;
  4703. }
  4704. SDValue CCBit = CurDAG->getTargetExtractSubreg(SRI, dl, MVT::i1, CCReg);
  4705. SDValue NotCCBit(CurDAG->getMachineNode(PPC::CRNOR, dl, MVT::i1,
  4706. CCBit, CCBit), 0);
  4707. SDValue C = Inv ? NotCCBit : CCBit,
  4708. NotC = Inv ? CCBit : NotCCBit;
  4709. SDValue CAndT(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
  4710. C, N->getOperand(2)), 0);
  4711. SDValue NotCAndF(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
  4712. NotC, N->getOperand(3)), 0);
  4713. CurDAG->SelectNodeTo(N, PPC::CROR, MVT::i1, CAndT, NotCAndF);
  4714. return;
  4715. }
  4716. unsigned BROpc =
  4717. getPredicateForSetCC(CC, N->getOperand(0).getValueType(), Subtarget);
  4718. unsigned SelectCCOp;
  4719. if (N->getValueType(0) == MVT::i32)
  4720. SelectCCOp = PPC::SELECT_CC_I4;
  4721. else if (N->getValueType(0) == MVT::i64)
  4722. SelectCCOp = PPC::SELECT_CC_I8;
  4723. else if (N->getValueType(0) == MVT::f32) {
  4724. if (Subtarget->hasP8Vector())
  4725. SelectCCOp = PPC::SELECT_CC_VSSRC;
  4726. else if (Subtarget->hasSPE())
  4727. SelectCCOp = PPC::SELECT_CC_SPE4;
  4728. else
  4729. SelectCCOp = PPC::SELECT_CC_F4;
  4730. } else if (N->getValueType(0) == MVT::f64) {
  4731. if (Subtarget->hasVSX())
  4732. SelectCCOp = PPC::SELECT_CC_VSFRC;
  4733. else if (Subtarget->hasSPE())
  4734. SelectCCOp = PPC::SELECT_CC_SPE;
  4735. else
  4736. SelectCCOp = PPC::SELECT_CC_F8;
  4737. } else if (N->getValueType(0) == MVT::f128)
  4738. SelectCCOp = PPC::SELECT_CC_F16;
  4739. else if (Subtarget->hasSPE())
  4740. SelectCCOp = PPC::SELECT_CC_SPE;
  4741. else if (N->getValueType(0) == MVT::v2f64 ||
  4742. N->getValueType(0) == MVT::v2i64)
  4743. SelectCCOp = PPC::SELECT_CC_VSRC;
  4744. else
  4745. SelectCCOp = PPC::SELECT_CC_VRRC;
  4746. SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3),
  4747. getI32Imm(BROpc, dl) };
  4748. CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops);
  4749. return;
  4750. }
  4751. case ISD::VECTOR_SHUFFLE:
  4752. if (Subtarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 ||
  4753. N->getValueType(0) == MVT::v2i64)) {
  4754. ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
  4755. SDValue Op1 = N->getOperand(SVN->getMaskElt(0) < 2 ? 0 : 1),
  4756. Op2 = N->getOperand(SVN->getMaskElt(1) < 2 ? 0 : 1);
  4757. unsigned DM[2];
  4758. for (int i = 0; i < 2; ++i)
  4759. if (SVN->getMaskElt(i) <= 0 || SVN->getMaskElt(i) == 2)
  4760. DM[i] = 0;
  4761. else
  4762. DM[i] = 1;
  4763. if (Op1 == Op2 && DM[0] == 0 && DM[1] == 0 &&
  4764. Op1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
  4765. isa<LoadSDNode>(Op1.getOperand(0))) {
  4766. LoadSDNode *LD = cast<LoadSDNode>(Op1.getOperand(0));
  4767. SDValue Base, Offset;
  4768. if (LD->isUnindexed() && LD->hasOneUse() && Op1.hasOneUse() &&
  4769. (LD->getMemoryVT() == MVT::f64 ||
  4770. LD->getMemoryVT() == MVT::i64) &&
  4771. SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) {
  4772. SDValue Chain = LD->getChain();
  4773. SDValue Ops[] = { Base, Offset, Chain };
  4774. MachineMemOperand *MemOp = LD->getMemOperand();
  4775. SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX,
  4776. N->getValueType(0), Ops);
  4777. CurDAG->setNodeMemRefs(cast<MachineSDNode>(NewN), {MemOp});
  4778. return;
  4779. }
  4780. }
  4781. // For little endian, we must swap the input operands and adjust
  4782. // the mask elements (reverse and invert them).
  4783. if (Subtarget->isLittleEndian()) {
  4784. std::swap(Op1, Op2);
  4785. unsigned tmp = DM[0];
  4786. DM[0] = 1 - DM[1];
  4787. DM[1] = 1 - tmp;
  4788. }
  4789. SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), dl,
  4790. MVT::i32);
  4791. SDValue Ops[] = { Op1, Op2, DMV };
  4792. CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops);
  4793. return;
  4794. }
  4795. break;
  4796. case PPCISD::BDNZ:
  4797. case PPCISD::BDZ: {
  4798. bool IsPPC64 = Subtarget->isPPC64();
  4799. SDValue Ops[] = { N->getOperand(1), N->getOperand(0) };
  4800. CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ
  4801. ? (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ)
  4802. : (IsPPC64 ? PPC::BDZ8 : PPC::BDZ),
  4803. MVT::Other, Ops);
  4804. return;
  4805. }
  4806. case PPCISD::COND_BRANCH: {
  4807. // Op #0 is the Chain.
  4808. // Op #1 is the PPC::PRED_* number.
  4809. // Op #2 is the CR#
  4810. // Op #3 is the Dest MBB
  4811. // Op #4 is the Flag.
  4812. // Prevent PPC::PRED_* from being selected into LI.
  4813. unsigned PCC = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
  4814. if (EnableBranchHint)
  4815. PCC |= getBranchHint(PCC, *FuncInfo, N->getOperand(3));
  4816. SDValue Pred = getI32Imm(PCC, dl);
  4817. SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3),
  4818. N->getOperand(0), N->getOperand(4) };
  4819. CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
  4820. return;
  4821. }
  4822. case ISD::BR_CC: {
  4823. if (tryFoldSWTestBRCC(N))
  4824. return;
  4825. ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
  4826. unsigned PCC =
  4827. getPredicateForSetCC(CC, N->getOperand(2).getValueType(), Subtarget);
  4828. if (N->getOperand(2).getValueType() == MVT::i1) {
  4829. unsigned Opc;
  4830. bool Swap;
  4831. switch (PCC) {
  4832. default: llvm_unreachable("Unexpected Boolean-operand predicate");
  4833. case PPC::PRED_LT: Opc = PPC::CRANDC; Swap = true; break;
  4834. case PPC::PRED_LE: Opc = PPC::CRORC; Swap = true; break;
  4835. case PPC::PRED_EQ: Opc = PPC::CREQV; Swap = false; break;
  4836. case PPC::PRED_GE: Opc = PPC::CRORC; Swap = false; break;
  4837. case PPC::PRED_GT: Opc = PPC::CRANDC; Swap = false; break;
  4838. case PPC::PRED_NE: Opc = PPC::CRXOR; Swap = false; break;
  4839. }
  4840. // A signed comparison of i1 values produces the opposite result to an
  4841. // unsigned one if the condition code includes less-than or greater-than.
  4842. // This is because 1 is the most negative signed i1 number and the most
  4843. // positive unsigned i1 number. The CR-logical operations used for such
  4844. // comparisons are non-commutative so for signed comparisons vs. unsigned
  4845. // ones, the input operands just need to be swapped.
  4846. if (ISD::isSignedIntSetCC(CC))
  4847. Swap = !Swap;
  4848. SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1,
  4849. N->getOperand(Swap ? 3 : 2),
  4850. N->getOperand(Swap ? 2 : 3)), 0);
  4851. CurDAG->SelectNodeTo(N, PPC::BC, MVT::Other, BitComp, N->getOperand(4),
  4852. N->getOperand(0));
  4853. return;
  4854. }
  4855. if (EnableBranchHint)
  4856. PCC |= getBranchHint(PCC, *FuncInfo, N->getOperand(4));
  4857. SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl);
  4858. SDValue Ops[] = { getI32Imm(PCC, dl), CondCode,
  4859. N->getOperand(4), N->getOperand(0) };
  4860. CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
  4861. return;
  4862. }
  4863. case ISD::BRIND: {
  4864. // FIXME: Should custom lower this.
  4865. SDValue Chain = N->getOperand(0);
  4866. SDValue Target = N->getOperand(1);
  4867. unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8;
  4868. unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8;
  4869. Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Target,
  4870. Chain), 0);
  4871. CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain);
  4872. return;
  4873. }
  4874. case PPCISD::TOC_ENTRY: {
  4875. const bool isPPC64 = Subtarget->isPPC64();
  4876. const bool isELFABI = Subtarget->isSVR4ABI();
  4877. const bool isAIXABI = Subtarget->isAIXABI();
  4878. // PowerPC only support small, medium and large code model.
  4879. const CodeModel::Model CModel = TM.getCodeModel();
  4880. assert(!(CModel == CodeModel::Tiny || CModel == CodeModel::Kernel) &&
  4881. "PowerPC doesn't support tiny or kernel code models.");
  4882. if (isAIXABI && CModel == CodeModel::Medium)
  4883. report_fatal_error("Medium code model is not supported on AIX.");
  4884. // For 64-bit small code model, we allow SelectCodeCommon to handle this,
  4885. // selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA.
  4886. if (isPPC64 && CModel == CodeModel::Small)
  4887. break;
  4888. // Handle 32-bit small code model.
  4889. if (!isPPC64) {
  4890. // Transforms the ISD::TOC_ENTRY node to a PPCISD::LWZtoc.
  4891. auto replaceWithLWZtoc = [this, &dl](SDNode *TocEntry) {
  4892. SDValue GA = TocEntry->getOperand(0);
  4893. SDValue TocBase = TocEntry->getOperand(1);
  4894. SDNode *MN = CurDAG->getMachineNode(PPC::LWZtoc, dl, MVT::i32, GA,
  4895. TocBase);
  4896. transferMemOperands(TocEntry, MN);
  4897. ReplaceNode(TocEntry, MN);
  4898. };
  4899. if (isELFABI) {
  4900. assert(TM.isPositionIndependent() &&
  4901. "32-bit ELF can only have TOC entries in position independent"
  4902. " code.");
  4903. // 32-bit ELF always uses a small code model toc access.
  4904. replaceWithLWZtoc(N);
  4905. return;
  4906. }
  4907. if (isAIXABI && CModel == CodeModel::Small) {
  4908. replaceWithLWZtoc(N);
  4909. return;
  4910. }
  4911. }
  4912. assert(CModel != CodeModel::Small && "All small code models handled.");
  4913. assert((isPPC64 || (isAIXABI && !isPPC64)) && "We are dealing with 64-bit"
  4914. " ELF/AIX or 32-bit AIX in the following.");
  4915. // Transforms the ISD::TOC_ENTRY node for 32-bit AIX large code model mode
  4916. // or 64-bit medium (ELF-only) or large (ELF and AIX) code model code. We
  4917. // generate two instructions as described below. The first source operand
  4918. // is a symbol reference. If it must be toc-referenced according to
  4919. // Subtarget, we generate:
  4920. // [32-bit AIX]
  4921. // LWZtocL(@sym, ADDIStocHA(%r2, @sym))
  4922. // [64-bit ELF/AIX]
  4923. // LDtocL(@sym, ADDIStocHA8(%x2, @sym))
  4924. // Otherwise we generate:
  4925. // ADDItocL(ADDIStocHA8(%x2, @sym), @sym)
  4926. SDValue GA = N->getOperand(0);
  4927. SDValue TOCbase = N->getOperand(1);
  4928. EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
  4929. SDNode *Tmp = CurDAG->getMachineNode(
  4930. isPPC64 ? PPC::ADDIStocHA8 : PPC::ADDIStocHA, dl, VT, TOCbase, GA);
  4931. if (PPCLowering->isAccessedAsGotIndirect(GA)) {
  4932. // If it is accessed as got-indirect, we need an extra LWZ/LD to load
  4933. // the address.
  4934. SDNode *MN = CurDAG->getMachineNode(
  4935. isPPC64 ? PPC::LDtocL : PPC::LWZtocL, dl, VT, GA, SDValue(Tmp, 0));
  4936. transferMemOperands(N, MN);
  4937. ReplaceNode(N, MN);
  4938. return;
  4939. }
  4940. // Build the address relative to the TOC-pointer.
  4941. ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,
  4942. SDValue(Tmp, 0), GA));
  4943. return;
  4944. }
  4945. case PPCISD::PPC32_PICGOT:
  4946. // Generate a PIC-safe GOT reference.
  4947. assert(Subtarget->is32BitELFABI() &&
  4948. "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4");
  4949. CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT,
  4950. PPCLowering->getPointerTy(CurDAG->getDataLayout()),
  4951. MVT::i32);
  4952. return;
  4953. case PPCISD::VADD_SPLAT: {
  4954. // This expands into one of three sequences, depending on whether
  4955. // the first operand is odd or even, positive or negative.
  4956. assert(isa<ConstantSDNode>(N->getOperand(0)) &&
  4957. isa<ConstantSDNode>(N->getOperand(1)) &&
  4958. "Invalid operand on VADD_SPLAT!");
  4959. int Elt = N->getConstantOperandVal(0);
  4960. int EltSize = N->getConstantOperandVal(1);
  4961. unsigned Opc1, Opc2, Opc3;
  4962. EVT VT;
  4963. if (EltSize == 1) {
  4964. Opc1 = PPC::VSPLTISB;
  4965. Opc2 = PPC::VADDUBM;
  4966. Opc3 = PPC::VSUBUBM;
  4967. VT = MVT::v16i8;
  4968. } else if (EltSize == 2) {
  4969. Opc1 = PPC::VSPLTISH;
  4970. Opc2 = PPC::VADDUHM;
  4971. Opc3 = PPC::VSUBUHM;
  4972. VT = MVT::v8i16;
  4973. } else {
  4974. assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!");
  4975. Opc1 = PPC::VSPLTISW;
  4976. Opc2 = PPC::VADDUWM;
  4977. Opc3 = PPC::VSUBUWM;
  4978. VT = MVT::v4i32;
  4979. }
  4980. if ((Elt & 1) == 0) {
  4981. // Elt is even, in the range [-32,-18] + [16,30].
  4982. //
  4983. // Convert: VADD_SPLAT elt, size
  4984. // Into: tmp = VSPLTIS[BHW] elt
  4985. // VADDU[BHW]M tmp, tmp
  4986. // Where: [BHW] = B for size = 1, H for size = 2, W for size = 4
  4987. SDValue EltVal = getI32Imm(Elt >> 1, dl);
  4988. SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
  4989. SDValue TmpVal = SDValue(Tmp, 0);
  4990. ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal));
  4991. return;
  4992. } else if (Elt > 0) {
  4993. // Elt is odd and positive, in the range [17,31].
  4994. //
  4995. // Convert: VADD_SPLAT elt, size
  4996. // Into: tmp1 = VSPLTIS[BHW] elt-16
  4997. // tmp2 = VSPLTIS[BHW] -16
  4998. // VSUBU[BHW]M tmp1, tmp2
  4999. SDValue EltVal = getI32Imm(Elt - 16, dl);
  5000. SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
  5001. EltVal = getI32Imm(-16, dl);
  5002. SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
  5003. ReplaceNode(N, CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0),
  5004. SDValue(Tmp2, 0)));
  5005. return;
  5006. } else {
  5007. // Elt is odd and negative, in the range [-31,-17].
  5008. //
  5009. // Convert: VADD_SPLAT elt, size
  5010. // Into: tmp1 = VSPLTIS[BHW] elt+16
  5011. // tmp2 = VSPLTIS[BHW] -16
  5012. // VADDU[BHW]M tmp1, tmp2
  5013. SDValue EltVal = getI32Imm(Elt + 16, dl);
  5014. SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
  5015. EltVal = getI32Imm(-16, dl);
  5016. SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
  5017. ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0),
  5018. SDValue(Tmp2, 0)));
  5019. return;
  5020. }
  5021. }
  5022. }
  5023. SelectCode(N);
  5024. }
  5025. // If the target supports the cmpb instruction, do the idiom recognition here.
  5026. // We don't do this as a DAG combine because we don't want to do it as nodes
  5027. // are being combined (because we might miss part of the eventual idiom). We
  5028. // don't want to do it during instruction selection because we want to reuse
  5029. // the logic for lowering the masking operations already part of the
  5030. // instruction selector.
  5031. SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
  5032. SDLoc dl(N);
  5033. assert(N->getOpcode() == ISD::OR &&
  5034. "Only OR nodes are supported for CMPB");
  5035. SDValue Res;
  5036. if (!Subtarget->hasCMPB())
  5037. return Res;
  5038. if (N->getValueType(0) != MVT::i32 &&
  5039. N->getValueType(0) != MVT::i64)
  5040. return Res;
  5041. EVT VT = N->getValueType(0);
  5042. SDValue RHS, LHS;
  5043. bool BytesFound[8] = {false, false, false, false, false, false, false, false};
  5044. uint64_t Mask = 0, Alt = 0;
  5045. auto IsByteSelectCC = [this](SDValue O, unsigned &b,
  5046. uint64_t &Mask, uint64_t &Alt,
  5047. SDValue &LHS, SDValue &RHS) {
  5048. if (O.getOpcode() != ISD::SELECT_CC)
  5049. return false;
  5050. ISD::CondCode CC = cast<CondCodeSDNode>(O.getOperand(4))->get();
  5051. if (!isa<ConstantSDNode>(O.getOperand(2)) ||
  5052. !isa<ConstantSDNode>(O.getOperand(3)))
  5053. return false;
  5054. uint64_t PM = O.getConstantOperandVal(2);
  5055. uint64_t PAlt = O.getConstantOperandVal(3);
  5056. for (b = 0; b < 8; ++b) {
  5057. uint64_t Mask = UINT64_C(0xFF) << (8*b);
  5058. if (PM && (PM & Mask) == PM && (PAlt & Mask) == PAlt)
  5059. break;
  5060. }
  5061. if (b == 8)
  5062. return false;
  5063. Mask |= PM;
  5064. Alt |= PAlt;
  5065. if (!isa<ConstantSDNode>(O.getOperand(1)) ||
  5066. O.getConstantOperandVal(1) != 0) {
  5067. SDValue Op0 = O.getOperand(0), Op1 = O.getOperand(1);
  5068. if (Op0.getOpcode() == ISD::TRUNCATE)
  5069. Op0 = Op0.getOperand(0);
  5070. if (Op1.getOpcode() == ISD::TRUNCATE)
  5071. Op1 = Op1.getOperand(0);
  5072. if (Op0.getOpcode() == ISD::SRL && Op1.getOpcode() == ISD::SRL &&
  5073. Op0.getOperand(1) == Op1.getOperand(1) && CC == ISD::SETEQ &&
  5074. isa<ConstantSDNode>(Op0.getOperand(1))) {
  5075. unsigned Bits = Op0.getValueSizeInBits();
  5076. if (b != Bits/8-1)
  5077. return false;
  5078. if (Op0.getConstantOperandVal(1) != Bits-8)
  5079. return false;
  5080. LHS = Op0.getOperand(0);
  5081. RHS = Op1.getOperand(0);
  5082. return true;
  5083. }
  5084. // When we have small integers (i16 to be specific), the form present
  5085. // post-legalization uses SETULT in the SELECT_CC for the
  5086. // higher-order byte, depending on the fact that the
  5087. // even-higher-order bytes are known to all be zero, for example:
  5088. // select_cc (xor $lhs, $rhs), 256, 65280, 0, setult
  5089. // (so when the second byte is the same, because all higher-order
  5090. // bits from bytes 3 and 4 are known to be zero, the result of the
  5091. // xor can be at most 255)
  5092. if (Op0.getOpcode() == ISD::XOR && CC == ISD::SETULT &&
  5093. isa<ConstantSDNode>(O.getOperand(1))) {
  5094. uint64_t ULim = O.getConstantOperandVal(1);
  5095. if (ULim != (UINT64_C(1) << b*8))
  5096. return false;
  5097. // Now we need to make sure that the upper bytes are known to be
  5098. // zero.
  5099. unsigned Bits = Op0.getValueSizeInBits();
  5100. if (!CurDAG->MaskedValueIsZero(
  5101. Op0, APInt::getHighBitsSet(Bits, Bits - (b + 1) * 8)))
  5102. return false;
  5103. LHS = Op0.getOperand(0);
  5104. RHS = Op0.getOperand(1);
  5105. return true;
  5106. }
  5107. return false;
  5108. }
  5109. if (CC != ISD::SETEQ)
  5110. return false;
  5111. SDValue Op = O.getOperand(0);
  5112. if (Op.getOpcode() == ISD::AND) {
  5113. if (!isa<ConstantSDNode>(Op.getOperand(1)))
  5114. return false;
  5115. if (Op.getConstantOperandVal(1) != (UINT64_C(0xFF) << (8*b)))
  5116. return false;
  5117. SDValue XOR = Op.getOperand(0);
  5118. if (XOR.getOpcode() == ISD::TRUNCATE)
  5119. XOR = XOR.getOperand(0);
  5120. if (XOR.getOpcode() != ISD::XOR)
  5121. return false;
  5122. LHS = XOR.getOperand(0);
  5123. RHS = XOR.getOperand(1);
  5124. return true;
  5125. } else if (Op.getOpcode() == ISD::SRL) {
  5126. if (!isa<ConstantSDNode>(Op.getOperand(1)))
  5127. return false;
  5128. unsigned Bits = Op.getValueSizeInBits();
  5129. if (b != Bits/8-1)
  5130. return false;
  5131. if (Op.getConstantOperandVal(1) != Bits-8)
  5132. return false;
  5133. SDValue XOR = Op.getOperand(0);
  5134. if (XOR.getOpcode() == ISD::TRUNCATE)
  5135. XOR = XOR.getOperand(0);
  5136. if (XOR.getOpcode() != ISD::XOR)
  5137. return false;
  5138. LHS = XOR.getOperand(0);
  5139. RHS = XOR.getOperand(1);
  5140. return true;
  5141. }
  5142. return false;
  5143. };
  5144. SmallVector<SDValue, 8> Queue(1, SDValue(N, 0));
  5145. while (!Queue.empty()) {
  5146. SDValue V = Queue.pop_back_val();
  5147. for (const SDValue &O : V.getNode()->ops()) {
  5148. unsigned b = 0;
  5149. uint64_t M = 0, A = 0;
  5150. SDValue OLHS, ORHS;
  5151. if (O.getOpcode() == ISD::OR) {
  5152. Queue.push_back(O);
  5153. } else if (IsByteSelectCC(O, b, M, A, OLHS, ORHS)) {
  5154. if (!LHS) {
  5155. LHS = OLHS;
  5156. RHS = ORHS;
  5157. BytesFound[b] = true;
  5158. Mask |= M;
  5159. Alt |= A;
  5160. } else if ((LHS == ORHS && RHS == OLHS) ||
  5161. (RHS == ORHS && LHS == OLHS)) {
  5162. BytesFound[b] = true;
  5163. Mask |= M;
  5164. Alt |= A;
  5165. } else {
  5166. return Res;
  5167. }
  5168. } else {
  5169. return Res;
  5170. }
  5171. }
  5172. }
  5173. unsigned LastB = 0, BCnt = 0;
  5174. for (unsigned i = 0; i < 8; ++i)
  5175. if (BytesFound[LastB]) {
  5176. ++BCnt;
  5177. LastB = i;
  5178. }
  5179. if (!LastB || BCnt < 2)
  5180. return Res;
  5181. // Because we'll be zero-extending the output anyway if don't have a specific
  5182. // value for each input byte (via the Mask), we can 'anyext' the inputs.
  5183. if (LHS.getValueType() != VT) {
  5184. LHS = CurDAG->getAnyExtOrTrunc(LHS, dl, VT);
  5185. RHS = CurDAG->getAnyExtOrTrunc(RHS, dl, VT);
  5186. }
  5187. Res = CurDAG->getNode(PPCISD::CMPB, dl, VT, LHS, RHS);
  5188. bool NonTrivialMask = ((int64_t) Mask) != INT64_C(-1);
  5189. if (NonTrivialMask && !Alt) {
  5190. // Res = Mask & CMPB
  5191. Res = CurDAG->getNode(ISD::AND, dl, VT, Res,
  5192. CurDAG->getConstant(Mask, dl, VT));
  5193. } else if (Alt) {
  5194. // Res = (CMPB & Mask) | (~CMPB & Alt)
  5195. // Which, as suggested here:
  5196. // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
  5197. // can be written as:
  5198. // Res = Alt ^ ((Alt ^ Mask) & CMPB)
  5199. // useful because the (Alt ^ Mask) can be pre-computed.
  5200. Res = CurDAG->getNode(ISD::AND, dl, VT, Res,
  5201. CurDAG->getConstant(Mask ^ Alt, dl, VT));
  5202. Res = CurDAG->getNode(ISD::XOR, dl, VT, Res,
  5203. CurDAG->getConstant(Alt, dl, VT));
  5204. }
  5205. return Res;
  5206. }
  5207. // When CR bit registers are enabled, an extension of an i1 variable to a i32
  5208. // or i64 value is lowered in terms of a SELECT_I[48] operation, and thus
  5209. // involves constant materialization of a 0 or a 1 or both. If the result of
  5210. // the extension is then operated upon by some operator that can be constant
  5211. // folded with a constant 0 or 1, and that constant can be materialized using
  5212. // only one instruction (like a zero or one), then we should fold in those
  5213. // operations with the select.
  5214. void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) {
  5215. if (!Subtarget->useCRBits())
  5216. return;
  5217. if (N->getOpcode() != ISD::ZERO_EXTEND &&
  5218. N->getOpcode() != ISD::SIGN_EXTEND &&
  5219. N->getOpcode() != ISD::ANY_EXTEND)
  5220. return;
  5221. if (N->getOperand(0).getValueType() != MVT::i1)
  5222. return;
  5223. if (!N->hasOneUse())
  5224. return;
  5225. SDLoc dl(N);
  5226. EVT VT = N->getValueType(0);
  5227. SDValue Cond = N->getOperand(0);
  5228. SDValue ConstTrue =
  5229. CurDAG->getConstant(N->getOpcode() == ISD::SIGN_EXTEND ? -1 : 1, dl, VT);
  5230. SDValue ConstFalse = CurDAG->getConstant(0, dl, VT);
  5231. do {
  5232. SDNode *User = *N->use_begin();
  5233. if (User->getNumOperands() != 2)
  5234. break;
  5235. auto TryFold = [this, N, User, dl](SDValue Val) {
  5236. SDValue UserO0 = User->getOperand(0), UserO1 = User->getOperand(1);
  5237. SDValue O0 = UserO0.getNode() == N ? Val : UserO0;
  5238. SDValue O1 = UserO1.getNode() == N ? Val : UserO1;
  5239. return CurDAG->FoldConstantArithmetic(User->getOpcode(), dl,
  5240. User->getValueType(0), {O0, O1});
  5241. };
  5242. // FIXME: When the semantics of the interaction between select and undef
  5243. // are clearly defined, it may turn out to be unnecessary to break here.
  5244. SDValue TrueRes = TryFold(ConstTrue);
  5245. if (!TrueRes || TrueRes.isUndef())
  5246. break;
  5247. SDValue FalseRes = TryFold(ConstFalse);
  5248. if (!FalseRes || FalseRes.isUndef())
  5249. break;
  5250. // For us to materialize these using one instruction, we must be able to
  5251. // represent them as signed 16-bit integers.
  5252. uint64_t True = cast<ConstantSDNode>(TrueRes)->getZExtValue(),
  5253. False = cast<ConstantSDNode>(FalseRes)->getZExtValue();
  5254. if (!isInt<16>(True) || !isInt<16>(False))
  5255. break;
  5256. // We can replace User with a new SELECT node, and try again to see if we
  5257. // can fold the select with its user.
  5258. Res = CurDAG->getSelect(dl, User->getValueType(0), Cond, TrueRes, FalseRes);
  5259. N = User;
  5260. ConstTrue = TrueRes;
  5261. ConstFalse = FalseRes;
  5262. } while (N->hasOneUse());
  5263. }
  5264. void PPCDAGToDAGISel::PreprocessISelDAG() {
  5265. SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
  5266. bool MadeChange = false;
  5267. while (Position != CurDAG->allnodes_begin()) {
  5268. SDNode *N = &*--Position;
  5269. if (N->use_empty())
  5270. continue;
  5271. SDValue Res;
  5272. switch (N->getOpcode()) {
  5273. default: break;
  5274. case ISD::OR:
  5275. Res = combineToCMPB(N);
  5276. break;
  5277. }
  5278. if (!Res)
  5279. foldBoolExts(Res, N);
  5280. if (Res) {
  5281. LLVM_DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld: ");
  5282. LLVM_DEBUG(N->dump(CurDAG));
  5283. LLVM_DEBUG(dbgs() << "\nNew: ");
  5284. LLVM_DEBUG(Res.getNode()->dump(CurDAG));
  5285. LLVM_DEBUG(dbgs() << "\n");
  5286. CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
  5287. MadeChange = true;
  5288. }
  5289. }
  5290. if (MadeChange)
  5291. CurDAG->RemoveDeadNodes();
  5292. }
  5293. /// PostprocessISelDAG - Perform some late peephole optimizations
  5294. /// on the DAG representation.
  5295. void PPCDAGToDAGISel::PostprocessISelDAG() {
  5296. // Skip peepholes at -O0.
  5297. if (TM.getOptLevel() == CodeGenOpt::None)
  5298. return;
  5299. PeepholePPC64();
  5300. PeepholeCROps();
  5301. PeepholePPC64ZExt();
  5302. }
  5303. // Check if all users of this node will become isel where the second operand
  5304. // is the constant zero. If this is so, and if we can negate the condition,
  5305. // then we can flip the true and false operands. This will allow the zero to
  5306. // be folded with the isel so that we don't need to materialize a register
  5307. // containing zero.
  5308. bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) {
  5309. for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
  5310. UI != UE; ++UI) {
  5311. SDNode *User = *UI;
  5312. if (!User->isMachineOpcode())
  5313. return false;
  5314. if (User->getMachineOpcode() != PPC::SELECT_I4 &&
  5315. User->getMachineOpcode() != PPC::SELECT_I8)
  5316. return false;
  5317. SDNode *Op1 = User->getOperand(1).getNode();
  5318. SDNode *Op2 = User->getOperand(2).getNode();
  5319. // If we have a degenerate select with two equal operands, swapping will
  5320. // not do anything, and we may run into an infinite loop.
  5321. if (Op1 == Op2)
  5322. return false;
  5323. if (!Op2->isMachineOpcode())
  5324. return false;
  5325. if (Op2->getMachineOpcode() != PPC::LI &&
  5326. Op2->getMachineOpcode() != PPC::LI8)
  5327. return false;
  5328. ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op2->getOperand(0));
  5329. if (!C)
  5330. return false;
  5331. if (!C->isNullValue())
  5332. return false;
  5333. }
  5334. return true;
  5335. }
  5336. void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode *N) {
  5337. SmallVector<SDNode *, 4> ToReplace;
  5338. for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
  5339. UI != UE; ++UI) {
  5340. SDNode *User = *UI;
  5341. assert((User->getMachineOpcode() == PPC::SELECT_I4 ||
  5342. User->getMachineOpcode() == PPC::SELECT_I8) &&
  5343. "Must have all select users");
  5344. ToReplace.push_back(User);
  5345. }
  5346. for (SmallVector<SDNode *, 4>::iterator UI = ToReplace.begin(),
  5347. UE = ToReplace.end(); UI != UE; ++UI) {
  5348. SDNode *User = *UI;
  5349. SDNode *ResNode =
  5350. CurDAG->getMachineNode(User->getMachineOpcode(), SDLoc(User),
  5351. User->getValueType(0), User->getOperand(0),
  5352. User->getOperand(2),
  5353. User->getOperand(1));
  5354. LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: ");
  5355. LLVM_DEBUG(User->dump(CurDAG));
  5356. LLVM_DEBUG(dbgs() << "\nNew: ");
  5357. LLVM_DEBUG(ResNode->dump(CurDAG));
  5358. LLVM_DEBUG(dbgs() << "\n");
  5359. ReplaceUses(User, ResNode);
  5360. }
  5361. }
  5362. void PPCDAGToDAGISel::PeepholeCROps() {
  5363. bool IsModified;
  5364. do {
  5365. IsModified = false;
  5366. for (SDNode &Node : CurDAG->allnodes()) {
  5367. MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node);
  5368. if (!MachineNode || MachineNode->use_empty())
  5369. continue;
  5370. SDNode *ResNode = MachineNode;
  5371. bool Op1Set = false, Op1Unset = false,
  5372. Op1Not = false,
  5373. Op2Set = false, Op2Unset = false,
  5374. Op2Not = false;
  5375. unsigned Opcode = MachineNode->getMachineOpcode();
  5376. switch (Opcode) {
  5377. default: break;
  5378. case PPC::CRAND:
  5379. case PPC::CRNAND:
  5380. case PPC::CROR:
  5381. case PPC::CRXOR:
  5382. case PPC::CRNOR:
  5383. case PPC::CREQV:
  5384. case PPC::CRANDC:
  5385. case PPC::CRORC: {
  5386. SDValue Op = MachineNode->getOperand(1);
  5387. if (Op.isMachineOpcode()) {
  5388. if (Op.getMachineOpcode() == PPC::CRSET)
  5389. Op2Set = true;
  5390. else if (Op.getMachineOpcode() == PPC::CRUNSET)
  5391. Op2Unset = true;
  5392. else if (Op.getMachineOpcode() == PPC::CRNOR &&
  5393. Op.getOperand(0) == Op.getOperand(1))
  5394. Op2Not = true;
  5395. }
  5396. LLVM_FALLTHROUGH;
  5397. }
  5398. case PPC::BC:
  5399. case PPC::BCn:
  5400. case PPC::SELECT_I4:
  5401. case PPC::SELECT_I8:
  5402. case PPC::SELECT_F4:
  5403. case PPC::SELECT_F8:
  5404. case PPC::SELECT_SPE:
  5405. case PPC::SELECT_SPE4:
  5406. case PPC::SELECT_VRRC:
  5407. case PPC::SELECT_VSFRC:
  5408. case PPC::SELECT_VSSRC:
  5409. case PPC::SELECT_VSRC: {
  5410. SDValue Op = MachineNode->getOperand(0);
  5411. if (Op.isMachineOpcode()) {
  5412. if (Op.getMachineOpcode() == PPC::CRSET)
  5413. Op1Set = true;
  5414. else if (Op.getMachineOpcode() == PPC::CRUNSET)
  5415. Op1Unset = true;
  5416. else if (Op.getMachineOpcode() == PPC::CRNOR &&
  5417. Op.getOperand(0) == Op.getOperand(1))
  5418. Op1Not = true;
  5419. }
  5420. }
  5421. break;
  5422. }
  5423. bool SelectSwap = false;
  5424. switch (Opcode) {
  5425. default: break;
  5426. case PPC::CRAND:
  5427. if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
  5428. // x & x = x
  5429. ResNode = MachineNode->getOperand(0).getNode();
  5430. else if (Op1Set)
  5431. // 1 & y = y
  5432. ResNode = MachineNode->getOperand(1).getNode();
  5433. else if (Op2Set)
  5434. // x & 1 = x
  5435. ResNode = MachineNode->getOperand(0).getNode();
  5436. else if (Op1Unset || Op2Unset)
  5437. // x & 0 = 0 & y = 0
  5438. ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
  5439. MVT::i1);
  5440. else if (Op1Not)
  5441. // ~x & y = andc(y, x)
  5442. ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
  5443. MVT::i1, MachineNode->getOperand(1),
  5444. MachineNode->getOperand(0).
  5445. getOperand(0));
  5446. else if (Op2Not)
  5447. // x & ~y = andc(x, y)
  5448. ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
  5449. MVT::i1, MachineNode->getOperand(0),
  5450. MachineNode->getOperand(1).
  5451. getOperand(0));
  5452. else if (AllUsersSelectZero(MachineNode)) {
  5453. ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),
  5454. MVT::i1, MachineNode->getOperand(0),
  5455. MachineNode->getOperand(1));
  5456. SelectSwap = true;
  5457. }
  5458. break;
  5459. case PPC::CRNAND:
  5460. if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
  5461. // nand(x, x) -> nor(x, x)
  5462. ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
  5463. MVT::i1, MachineNode->getOperand(0),
  5464. MachineNode->getOperand(0));
  5465. else if (Op1Set)
  5466. // nand(1, y) -> nor(y, y)
  5467. ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
  5468. MVT::i1, MachineNode->getOperand(1),
  5469. MachineNode->getOperand(1));
  5470. else if (Op2Set)
  5471. // nand(x, 1) -> nor(x, x)
  5472. ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
  5473. MVT::i1, MachineNode->getOperand(0),
  5474. MachineNode->getOperand(0));
  5475. else if (Op1Unset || Op2Unset)
  5476. // nand(x, 0) = nand(0, y) = 1
  5477. ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
  5478. MVT::i1);
  5479. else if (Op1Not)
  5480. // nand(~x, y) = ~(~x & y) = x | ~y = orc(x, y)
  5481. ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
  5482. MVT::i1, MachineNode->getOperand(0).
  5483. getOperand(0),
  5484. MachineNode->getOperand(1));
  5485. else if (Op2Not)
  5486. // nand(x, ~y) = ~x | y = orc(y, x)
  5487. ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
  5488. MVT::i1, MachineNode->getOperand(1).
  5489. getOperand(0),
  5490. MachineNode->getOperand(0));
  5491. else if (AllUsersSelectZero(MachineNode)) {
  5492. ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),
  5493. MVT::i1, MachineNode->getOperand(0),
  5494. MachineNode->getOperand(1));
  5495. SelectSwap = true;
  5496. }
  5497. break;
  5498. case PPC::CROR:
  5499. if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
  5500. // x | x = x
  5501. ResNode = MachineNode->getOperand(0).getNode();
  5502. else if (Op1Set || Op2Set)
  5503. // x | 1 = 1 | y = 1
  5504. ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
  5505. MVT::i1);
  5506. else if (Op1Unset)
  5507. // 0 | y = y
  5508. ResNode = MachineNode->getOperand(1).getNode();
  5509. else if (Op2Unset)
  5510. // x | 0 = x
  5511. ResNode = MachineNode->getOperand(0).getNode();
  5512. else if (Op1Not)
  5513. // ~x | y = orc(y, x)
  5514. ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
  5515. MVT::i1, MachineNode->getOperand(1),
  5516. MachineNode->getOperand(0).
  5517. getOperand(0));
  5518. else if (Op2Not)
  5519. // x | ~y = orc(x, y)
  5520. ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
  5521. MVT::i1, MachineNode->getOperand(0),
  5522. MachineNode->getOperand(1).
  5523. getOperand(0));
  5524. else if (AllUsersSelectZero(MachineNode)) {
  5525. ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
  5526. MVT::i1, MachineNode->getOperand(0),
  5527. MachineNode->getOperand(1));
  5528. SelectSwap = true;
  5529. }
  5530. break;
  5531. case PPC::CRXOR:
  5532. if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
  5533. // xor(x, x) = 0
  5534. ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
  5535. MVT::i1);
  5536. else if (Op1Set)
  5537. // xor(1, y) -> nor(y, y)
  5538. ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
  5539. MVT::i1, MachineNode->getOperand(1),
  5540. MachineNode->getOperand(1));
  5541. else if (Op2Set)
  5542. // xor(x, 1) -> nor(x, x)
  5543. ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
  5544. MVT::i1, MachineNode->getOperand(0),
  5545. MachineNode->getOperand(0));
  5546. else if (Op1Unset)
  5547. // xor(0, y) = y
  5548. ResNode = MachineNode->getOperand(1).getNode();
  5549. else if (Op2Unset)
  5550. // xor(x, 0) = x
  5551. ResNode = MachineNode->getOperand(0).getNode();
  5552. else if (Op1Not)
  5553. // xor(~x, y) = eqv(x, y)
  5554. ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
  5555. MVT::i1, MachineNode->getOperand(0).
  5556. getOperand(0),
  5557. MachineNode->getOperand(1));
  5558. else if (Op2Not)
  5559. // xor(x, ~y) = eqv(x, y)
  5560. ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
  5561. MVT::i1, MachineNode->getOperand(0),
  5562. MachineNode->getOperand(1).
  5563. getOperand(0));
  5564. else if (AllUsersSelectZero(MachineNode)) {
  5565. ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
  5566. MVT::i1, MachineNode->getOperand(0),
  5567. MachineNode->getOperand(1));
  5568. SelectSwap = true;
  5569. }
  5570. break;
  5571. case PPC::CRNOR:
  5572. if (Op1Set || Op2Set)
  5573. // nor(1, y) -> 0
  5574. ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
  5575. MVT::i1);
  5576. else if (Op1Unset)
  5577. // nor(0, y) = ~y -> nor(y, y)
  5578. ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
  5579. MVT::i1, MachineNode->getOperand(1),
  5580. MachineNode->getOperand(1));
  5581. else if (Op2Unset)
  5582. // nor(x, 0) = ~x
  5583. ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
  5584. MVT::i1, MachineNode->getOperand(0),
  5585. MachineNode->getOperand(0));
  5586. else if (Op1Not)
  5587. // nor(~x, y) = andc(x, y)
  5588. ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
  5589. MVT::i1, MachineNode->getOperand(0).
  5590. getOperand(0),
  5591. MachineNode->getOperand(1));
  5592. else if (Op2Not)
  5593. // nor(x, ~y) = andc(y, x)
  5594. ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
  5595. MVT::i1, MachineNode->getOperand(1).
  5596. getOperand(0),
  5597. MachineNode->getOperand(0));
  5598. else if (AllUsersSelectZero(MachineNode)) {
  5599. ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),
  5600. MVT::i1, MachineNode->getOperand(0),
  5601. MachineNode->getOperand(1));
  5602. SelectSwap = true;
  5603. }
  5604. break;
  5605. case PPC::CREQV:
  5606. if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
  5607. // eqv(x, x) = 1
  5608. ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
  5609. MVT::i1);
  5610. else if (Op1Set)
  5611. // eqv(1, y) = y
  5612. ResNode = MachineNode->getOperand(1).getNode();
  5613. else if (Op2Set)
  5614. // eqv(x, 1) = x
  5615. ResNode = MachineNode->getOperand(0).getNode();
  5616. else if (Op1Unset)
  5617. // eqv(0, y) = ~y -> nor(y, y)
  5618. ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
  5619. MVT::i1, MachineNode->getOperand(1),
  5620. MachineNode->getOperand(1));
  5621. else if (Op2Unset)
  5622. // eqv(x, 0) = ~x
  5623. ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
  5624. MVT::i1, MachineNode->getOperand(0),
  5625. MachineNode->getOperand(0));
  5626. else if (Op1Not)
  5627. // eqv(~x, y) = xor(x, y)
  5628. ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
  5629. MVT::i1, MachineNode->getOperand(0).
  5630. getOperand(0),
  5631. MachineNode->getOperand(1));
  5632. else if (Op2Not)
  5633. // eqv(x, ~y) = xor(x, y)
  5634. ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
  5635. MVT::i1, MachineNode->getOperand(0),
  5636. MachineNode->getOperand(1).
  5637. getOperand(0));
  5638. else if (AllUsersSelectZero(MachineNode)) {
  5639. ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
  5640. MVT::i1, MachineNode->getOperand(0),
  5641. MachineNode->getOperand(1));
  5642. SelectSwap = true;
  5643. }
  5644. break;
  5645. case PPC::CRANDC:
  5646. if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
  5647. // andc(x, x) = 0
  5648. ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
  5649. MVT::i1);
  5650. else if (Op1Set)
  5651. // andc(1, y) = ~y
  5652. ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
  5653. MVT::i1, MachineNode->getOperand(1),
  5654. MachineNode->getOperand(1));
  5655. else if (Op1Unset || Op2Set)
  5656. // andc(0, y) = andc(x, 1) = 0
  5657. ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
  5658. MVT::i1);
  5659. else if (Op2Unset)
  5660. // andc(x, 0) = x
  5661. ResNode = MachineNode->getOperand(0).getNode();
  5662. else if (Op1Not)
  5663. // andc(~x, y) = ~(x | y) = nor(x, y)
  5664. ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
  5665. MVT::i1, MachineNode->getOperand(0).
  5666. getOperand(0),
  5667. MachineNode->getOperand(1));
  5668. else if (Op2Not)
  5669. // andc(x, ~y) = x & y
  5670. ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),
  5671. MVT::i1, MachineNode->getOperand(0),
  5672. MachineNode->getOperand(1).
  5673. getOperand(0));
  5674. else if (AllUsersSelectZero(MachineNode)) {
  5675. ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
  5676. MVT::i1, MachineNode->getOperand(1),
  5677. MachineNode->getOperand(0));
  5678. SelectSwap = true;
  5679. }
  5680. break;
  5681. case PPC::CRORC:
  5682. if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
  5683. // orc(x, x) = 1
  5684. ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
  5685. MVT::i1);
  5686. else if (Op1Set || Op2Unset)
  5687. // orc(1, y) = orc(x, 0) = 1
  5688. ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
  5689. MVT::i1);
  5690. else if (Op2Set)
  5691. // orc(x, 1) = x
  5692. ResNode = MachineNode->getOperand(0).getNode();
  5693. else if (Op1Unset)
  5694. // orc(0, y) = ~y
  5695. ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
  5696. MVT::i1, MachineNode->getOperand(1),
  5697. MachineNode->getOperand(1));
  5698. else if (Op1Not)
  5699. // orc(~x, y) = ~(x & y) = nand(x, y)
  5700. ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),
  5701. MVT::i1, MachineNode->getOperand(0).
  5702. getOperand(0),
  5703. MachineNode->getOperand(1));
  5704. else if (Op2Not)
  5705. // orc(x, ~y) = x | y
  5706. ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),
  5707. MVT::i1, MachineNode->getOperand(0),
  5708. MachineNode->getOperand(1).
  5709. getOperand(0));
  5710. else if (AllUsersSelectZero(MachineNode)) {
  5711. ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
  5712. MVT::i1, MachineNode->getOperand(1),
  5713. MachineNode->getOperand(0));
  5714. SelectSwap = true;
  5715. }
  5716. break;
  5717. case PPC::SELECT_I4:
  5718. case PPC::SELECT_I8:
  5719. case PPC::SELECT_F4:
  5720. case PPC::SELECT_F8:
  5721. case PPC::SELECT_SPE:
  5722. case PPC::SELECT_SPE4:
  5723. case PPC::SELECT_VRRC:
  5724. case PPC::SELECT_VSFRC:
  5725. case PPC::SELECT_VSSRC:
  5726. case PPC::SELECT_VSRC:
  5727. if (Op1Set)
  5728. ResNode = MachineNode->getOperand(1).getNode();
  5729. else if (Op1Unset)
  5730. ResNode = MachineNode->getOperand(2).getNode();
  5731. else if (Op1Not)
  5732. ResNode = CurDAG->getMachineNode(MachineNode->getMachineOpcode(),
  5733. SDLoc(MachineNode),
  5734. MachineNode->getValueType(0),
  5735. MachineNode->getOperand(0).
  5736. getOperand(0),
  5737. MachineNode->getOperand(2),
  5738. MachineNode->getOperand(1));
  5739. break;
  5740. case PPC::BC:
  5741. case PPC::BCn:
  5742. if (Op1Not)
  5743. ResNode = CurDAG->getMachineNode(Opcode == PPC::BC ? PPC::BCn :
  5744. PPC::BC,
  5745. SDLoc(MachineNode),
  5746. MVT::Other,
  5747. MachineNode->getOperand(0).
  5748. getOperand(0),
  5749. MachineNode->getOperand(1),
  5750. MachineNode->getOperand(2));
  5751. // FIXME: Handle Op1Set, Op1Unset here too.
  5752. break;
  5753. }
  5754. // If we're inverting this node because it is used only by selects that
  5755. // we'd like to swap, then swap the selects before the node replacement.
  5756. if (SelectSwap)
  5757. SwapAllSelectUsers(MachineNode);
  5758. if (ResNode != MachineNode) {
  5759. LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: ");
  5760. LLVM_DEBUG(MachineNode->dump(CurDAG));
  5761. LLVM_DEBUG(dbgs() << "\nNew: ");
  5762. LLVM_DEBUG(ResNode->dump(CurDAG));
  5763. LLVM_DEBUG(dbgs() << "\n");
  5764. ReplaceUses(MachineNode, ResNode);
  5765. IsModified = true;
  5766. }
  5767. }
  5768. if (IsModified)
  5769. CurDAG->RemoveDeadNodes();
  5770. } while (IsModified);
  5771. }
  5772. // Gather the set of 32-bit operations that are known to have their
  5773. // higher-order 32 bits zero, where ToPromote contains all such operations.
  5774. static bool PeepholePPC64ZExtGather(SDValue Op32,
  5775. SmallPtrSetImpl<SDNode *> &ToPromote) {
  5776. if (!Op32.isMachineOpcode())
  5777. return false;
  5778. // First, check for the "frontier" instructions (those that will clear the
  5779. // higher-order 32 bits.
  5780. // For RLWINM and RLWNM, we need to make sure that the mask does not wrap
  5781. // around. If it does not, then these instructions will clear the
  5782. // higher-order bits.
  5783. if ((Op32.getMachineOpcode() == PPC::RLWINM ||
  5784. Op32.getMachineOpcode() == PPC::RLWNM) &&
  5785. Op32.getConstantOperandVal(2) <= Op32.getConstantOperandVal(3)) {
  5786. ToPromote.insert(Op32.getNode());
  5787. return true;
  5788. }
  5789. // SLW and SRW always clear the higher-order bits.
  5790. if (Op32.getMachineOpcode() == PPC::SLW ||
  5791. Op32.getMachineOpcode() == PPC::SRW) {
  5792. ToPromote.insert(Op32.getNode());
  5793. return true;
  5794. }
  5795. // For LI and LIS, we need the immediate to be positive (so that it is not
  5796. // sign extended).
  5797. if (Op32.getMachineOpcode() == PPC::LI ||
  5798. Op32.getMachineOpcode() == PPC::LIS) {
  5799. if (!isUInt<15>(Op32.getConstantOperandVal(0)))
  5800. return false;
  5801. ToPromote.insert(Op32.getNode());
  5802. return true;
  5803. }
  5804. // LHBRX and LWBRX always clear the higher-order bits.
  5805. if (Op32.getMachineOpcode() == PPC::LHBRX ||
  5806. Op32.getMachineOpcode() == PPC::LWBRX) {
  5807. ToPromote.insert(Op32.getNode());
  5808. return true;
  5809. }
  5810. // CNT[LT]ZW always produce a 64-bit value in [0,32], and so is zero extended.
  5811. if (Op32.getMachineOpcode() == PPC::CNTLZW ||
  5812. Op32.getMachineOpcode() == PPC::CNTTZW) {
  5813. ToPromote.insert(Op32.getNode());
  5814. return true;
  5815. }
  5816. // Next, check for those instructions we can look through.
  5817. // Assuming the mask does not wrap around, then the higher-order bits are
  5818. // taken directly from the first operand.
  5819. if (Op32.getMachineOpcode() == PPC::RLWIMI &&
  5820. Op32.getConstantOperandVal(3) <= Op32.getConstantOperandVal(4)) {
  5821. SmallPtrSet<SDNode *, 16> ToPromote1;
  5822. if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1))
  5823. return false;
  5824. ToPromote.insert(Op32.getNode());
  5825. ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
  5826. return true;
  5827. }
  5828. // For OR, the higher-order bits are zero if that is true for both operands.
  5829. // For SELECT_I4, the same is true (but the relevant operand numbers are
  5830. // shifted by 1).
  5831. if (Op32.getMachineOpcode() == PPC::OR ||
  5832. Op32.getMachineOpcode() == PPC::SELECT_I4) {
  5833. unsigned B = Op32.getMachineOpcode() == PPC::SELECT_I4 ? 1 : 0;
  5834. SmallPtrSet<SDNode *, 16> ToPromote1;
  5835. if (!PeepholePPC64ZExtGather(Op32.getOperand(B+0), ToPromote1))
  5836. return false;
  5837. if (!PeepholePPC64ZExtGather(Op32.getOperand(B+1), ToPromote1))
  5838. return false;
  5839. ToPromote.insert(Op32.getNode());
  5840. ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
  5841. return true;
  5842. }
  5843. // For ORI and ORIS, we need the higher-order bits of the first operand to be
  5844. // zero, and also for the constant to be positive (so that it is not sign
  5845. // extended).
  5846. if (Op32.getMachineOpcode() == PPC::ORI ||
  5847. Op32.getMachineOpcode() == PPC::ORIS) {
  5848. SmallPtrSet<SDNode *, 16> ToPromote1;
  5849. if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1))
  5850. return false;
  5851. if (!isUInt<15>(Op32.getConstantOperandVal(1)))
  5852. return false;
  5853. ToPromote.insert(Op32.getNode());
  5854. ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
  5855. return true;
  5856. }
  5857. // The higher-order bits of AND are zero if that is true for at least one of
  5858. // the operands.
  5859. if (Op32.getMachineOpcode() == PPC::AND) {
  5860. SmallPtrSet<SDNode *, 16> ToPromote1, ToPromote2;
  5861. bool Op0OK =
  5862. PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1);
  5863. bool Op1OK =
  5864. PeepholePPC64ZExtGather(Op32.getOperand(1), ToPromote2);
  5865. if (!Op0OK && !Op1OK)
  5866. return false;
  5867. ToPromote.insert(Op32.getNode());
  5868. if (Op0OK)
  5869. ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
  5870. if (Op1OK)
  5871. ToPromote.insert(ToPromote2.begin(), ToPromote2.end());
  5872. return true;
  5873. }
  5874. // For ANDI and ANDIS, the higher-order bits are zero if either that is true
  5875. // of the first operand, or if the second operand is positive (so that it is
  5876. // not sign extended).
  5877. if (Op32.getMachineOpcode() == PPC::ANDI_rec ||
  5878. Op32.getMachineOpcode() == PPC::ANDIS_rec) {
  5879. SmallPtrSet<SDNode *, 16> ToPromote1;
  5880. bool Op0OK =
  5881. PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1);
  5882. bool Op1OK = isUInt<15>(Op32.getConstantOperandVal(1));
  5883. if (!Op0OK && !Op1OK)
  5884. return false;
  5885. ToPromote.insert(Op32.getNode());
  5886. if (Op0OK)
  5887. ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
  5888. return true;
  5889. }
  5890. return false;
  5891. }
  5892. void PPCDAGToDAGISel::PeepholePPC64ZExt() {
  5893. if (!Subtarget->isPPC64())
  5894. return;
  5895. // When we zero-extend from i32 to i64, we use a pattern like this:
  5896. // def : Pat<(i64 (zext i32:$in)),
  5897. // (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32),
  5898. // 0, 32)>;
  5899. // There are several 32-bit shift/rotate instructions, however, that will
  5900. // clear the higher-order bits of their output, rendering the RLDICL
  5901. // unnecessary. When that happens, we remove it here, and redefine the
  5902. // relevant 32-bit operation to be a 64-bit operation.
  5903. SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
  5904. bool MadeChange = false;
  5905. while (Position != CurDAG->allnodes_begin()) {
  5906. SDNode *N = &*--Position;
  5907. // Skip dead nodes and any non-machine opcodes.
  5908. if (N->use_empty() || !N->isMachineOpcode())
  5909. continue;
  5910. if (N->getMachineOpcode() != PPC::RLDICL)
  5911. continue;
  5912. if (N->getConstantOperandVal(1) != 0 ||
  5913. N->getConstantOperandVal(2) != 32)
  5914. continue;
  5915. SDValue ISR = N->getOperand(0);
  5916. if (!ISR.isMachineOpcode() ||
  5917. ISR.getMachineOpcode() != TargetOpcode::INSERT_SUBREG)
  5918. continue;
  5919. if (!ISR.hasOneUse())
  5920. continue;
  5921. if (ISR.getConstantOperandVal(2) != PPC::sub_32)
  5922. continue;
  5923. SDValue IDef = ISR.getOperand(0);
  5924. if (!IDef.isMachineOpcode() ||
  5925. IDef.getMachineOpcode() != TargetOpcode::IMPLICIT_DEF)
  5926. continue;
  5927. // We now know that we're looking at a canonical i32 -> i64 zext. See if we
  5928. // can get rid of it.
  5929. SDValue Op32 = ISR->getOperand(1);
  5930. if (!Op32.isMachineOpcode())
  5931. continue;
  5932. // There are some 32-bit instructions that always clear the high-order 32
  5933. // bits, there are also some instructions (like AND) that we can look
  5934. // through.
  5935. SmallPtrSet<SDNode *, 16> ToPromote;
  5936. if (!PeepholePPC64ZExtGather(Op32, ToPromote))
  5937. continue;
  5938. // If the ToPromote set contains nodes that have uses outside of the set
  5939. // (except for the original INSERT_SUBREG), then abort the transformation.
  5940. bool OutsideUse = false;
  5941. for (SDNode *PN : ToPromote) {
  5942. for (SDNode *UN : PN->uses()) {
  5943. if (!ToPromote.count(UN) && UN != ISR.getNode()) {
  5944. OutsideUse = true;
  5945. break;
  5946. }
  5947. }
  5948. if (OutsideUse)
  5949. break;
  5950. }
  5951. if (OutsideUse)
  5952. continue;
  5953. MadeChange = true;
  5954. // We now know that this zero extension can be removed by promoting to
  5955. // nodes in ToPromote to 64-bit operations, where for operations in the
  5956. // frontier of the set, we need to insert INSERT_SUBREGs for their
  5957. // operands.
  5958. for (SDNode *PN : ToPromote) {
  5959. unsigned NewOpcode;
  5960. switch (PN->getMachineOpcode()) {
  5961. default:
  5962. llvm_unreachable("Don't know the 64-bit variant of this instruction");
  5963. case PPC::RLWINM: NewOpcode = PPC::RLWINM8; break;
  5964. case PPC::RLWNM: NewOpcode = PPC::RLWNM8; break;
  5965. case PPC::SLW: NewOpcode = PPC::SLW8; break;
  5966. case PPC::SRW: NewOpcode = PPC::SRW8; break;
  5967. case PPC::LI: NewOpcode = PPC::LI8; break;
  5968. case PPC::LIS: NewOpcode = PPC::LIS8; break;
  5969. case PPC::LHBRX: NewOpcode = PPC::LHBRX8; break;
  5970. case PPC::LWBRX: NewOpcode = PPC::LWBRX8; break;
  5971. case PPC::CNTLZW: NewOpcode = PPC::CNTLZW8; break;
  5972. case PPC::CNTTZW: NewOpcode = PPC::CNTTZW8; break;
  5973. case PPC::RLWIMI: NewOpcode = PPC::RLWIMI8; break;
  5974. case PPC::OR: NewOpcode = PPC::OR8; break;
  5975. case PPC::SELECT_I4: NewOpcode = PPC::SELECT_I8; break;
  5976. case PPC::ORI: NewOpcode = PPC::ORI8; break;
  5977. case PPC::ORIS: NewOpcode = PPC::ORIS8; break;
  5978. case PPC::AND: NewOpcode = PPC::AND8; break;
  5979. case PPC::ANDI_rec:
  5980. NewOpcode = PPC::ANDI8_rec;
  5981. break;
  5982. case PPC::ANDIS_rec:
  5983. NewOpcode = PPC::ANDIS8_rec;
  5984. break;
  5985. }
  5986. // Note: During the replacement process, the nodes will be in an
  5987. // inconsistent state (some instructions will have operands with values
  5988. // of the wrong type). Once done, however, everything should be right
  5989. // again.
  5990. SmallVector<SDValue, 4> Ops;
  5991. for (const SDValue &V : PN->ops()) {
  5992. if (!ToPromote.count(V.getNode()) && V.getValueType() == MVT::i32 &&
  5993. !isa<ConstantSDNode>(V)) {
  5994. SDValue ReplOpOps[] = { ISR.getOperand(0), V, ISR.getOperand(2) };
  5995. SDNode *ReplOp =
  5996. CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, SDLoc(V),
  5997. ISR.getNode()->getVTList(), ReplOpOps);
  5998. Ops.push_back(SDValue(ReplOp, 0));
  5999. } else {
  6000. Ops.push_back(V);
  6001. }
  6002. }
  6003. // Because all to-be-promoted nodes only have users that are other
  6004. // promoted nodes (or the original INSERT_SUBREG), we can safely replace
  6005. // the i32 result value type with i64.
  6006. SmallVector<EVT, 2> NewVTs;
  6007. SDVTList VTs = PN->getVTList();
  6008. for (unsigned i = 0, ie = VTs.NumVTs; i != ie; ++i)
  6009. if (VTs.VTs[i] == MVT::i32)
  6010. NewVTs.push_back(MVT::i64);
  6011. else
  6012. NewVTs.push_back(VTs.VTs[i]);
  6013. LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole morphing:\nOld: ");
  6014. LLVM_DEBUG(PN->dump(CurDAG));
  6015. CurDAG->SelectNodeTo(PN, NewOpcode, CurDAG->getVTList(NewVTs), Ops);
  6016. LLVM_DEBUG(dbgs() << "\nNew: ");
  6017. LLVM_DEBUG(PN->dump(CurDAG));
  6018. LLVM_DEBUG(dbgs() << "\n");
  6019. }
  6020. // Now we replace the original zero extend and its associated INSERT_SUBREG
  6021. // with the value feeding the INSERT_SUBREG (which has now been promoted to
  6022. // return an i64).
  6023. LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole replacing:\nOld: ");
  6024. LLVM_DEBUG(N->dump(CurDAG));
  6025. LLVM_DEBUG(dbgs() << "\nNew: ");
  6026. LLVM_DEBUG(Op32.getNode()->dump(CurDAG));
  6027. LLVM_DEBUG(dbgs() << "\n");
  6028. ReplaceUses(N, Op32.getNode());
  6029. }
  6030. if (MadeChange)
  6031. CurDAG->RemoveDeadNodes();
  6032. }
  6033. void PPCDAGToDAGISel::PeepholePPC64() {
  6034. SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
  6035. while (Position != CurDAG->allnodes_begin()) {
  6036. SDNode *N = &*--Position;
  6037. // Skip dead nodes and any non-machine opcodes.
  6038. if (N->use_empty() || !N->isMachineOpcode())
  6039. continue;
  6040. unsigned FirstOp;
  6041. unsigned StorageOpcode = N->getMachineOpcode();
  6042. bool RequiresMod4Offset = false;
  6043. switch (StorageOpcode) {
  6044. default: continue;
  6045. case PPC::LWA:
  6046. case PPC::LD:
  6047. case PPC::DFLOADf64:
  6048. case PPC::DFLOADf32:
  6049. RequiresMod4Offset = true;
  6050. LLVM_FALLTHROUGH;
  6051. case PPC::LBZ:
  6052. case PPC::LBZ8:
  6053. case PPC::LFD:
  6054. case PPC::LFS:
  6055. case PPC::LHA:
  6056. case PPC::LHA8:
  6057. case PPC::LHZ:
  6058. case PPC::LHZ8:
  6059. case PPC::LWZ:
  6060. case PPC::LWZ8:
  6061. FirstOp = 0;
  6062. break;
  6063. case PPC::STD:
  6064. case PPC::DFSTOREf64:
  6065. case PPC::DFSTOREf32:
  6066. RequiresMod4Offset = true;
  6067. LLVM_FALLTHROUGH;
  6068. case PPC::STB:
  6069. case PPC::STB8:
  6070. case PPC::STFD:
  6071. case PPC::STFS:
  6072. case PPC::STH:
  6073. case PPC::STH8:
  6074. case PPC::STW:
  6075. case PPC::STW8:
  6076. FirstOp = 1;
  6077. break;
  6078. }
  6079. // If this is a load or store with a zero offset, or within the alignment,
  6080. // we may be able to fold an add-immediate into the memory operation.
  6081. // The check against alignment is below, as it can't occur until we check
  6082. // the arguments to N
  6083. if (!isa<ConstantSDNode>(N->getOperand(FirstOp)))
  6084. continue;
  6085. SDValue Base = N->getOperand(FirstOp + 1);
  6086. if (!Base.isMachineOpcode())
  6087. continue;
  6088. unsigned Flags = 0;
  6089. bool ReplaceFlags = true;
  6090. // When the feeding operation is an add-immediate of some sort,
  6091. // determine whether we need to add relocation information to the
  6092. // target flags on the immediate operand when we fold it into the
  6093. // load instruction.
  6094. //
  6095. // For something like ADDItocL, the relocation information is
  6096. // inferred from the opcode; when we process it in the AsmPrinter,
  6097. // we add the necessary relocation there. A load, though, can receive
  6098. // relocation from various flavors of ADDIxxx, so we need to carry
  6099. // the relocation information in the target flags.
  6100. switch (Base.getMachineOpcode()) {
  6101. default: continue;
  6102. case PPC::ADDI8:
  6103. case PPC::ADDI:
  6104. // In some cases (such as TLS) the relocation information
  6105. // is already in place on the operand, so copying the operand
  6106. // is sufficient.
  6107. ReplaceFlags = false;
  6108. // For these cases, the immediate may not be divisible by 4, in
  6109. // which case the fold is illegal for DS-form instructions. (The
  6110. // other cases provide aligned addresses and are always safe.)
  6111. if (RequiresMod4Offset &&
  6112. (!isa<ConstantSDNode>(Base.getOperand(1)) ||
  6113. Base.getConstantOperandVal(1) % 4 != 0))
  6114. continue;
  6115. break;
  6116. case PPC::ADDIdtprelL:
  6117. Flags = PPCII::MO_DTPREL_LO;
  6118. break;
  6119. case PPC::ADDItlsldL:
  6120. Flags = PPCII::MO_TLSLD_LO;
  6121. break;
  6122. case PPC::ADDItocL:
  6123. Flags = PPCII::MO_TOC_LO;
  6124. break;
  6125. }
  6126. SDValue ImmOpnd = Base.getOperand(1);
  6127. // On PPC64, the TOC base pointer is guaranteed by the ABI only to have
  6128. // 8-byte alignment, and so we can only use offsets less than 8 (otherwise,
  6129. // we might have needed different @ha relocation values for the offset
  6130. // pointers).
  6131. int MaxDisplacement = 7;
  6132. if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
  6133. const GlobalValue *GV = GA->getGlobal();
  6134. Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());
  6135. MaxDisplacement = std::min((int)Alignment.value() - 1, MaxDisplacement);
  6136. }
  6137. bool UpdateHBase = false;
  6138. SDValue HBase = Base.getOperand(0);
  6139. int Offset = N->getConstantOperandVal(FirstOp);
  6140. if (ReplaceFlags) {
  6141. if (Offset < 0 || Offset > MaxDisplacement) {
  6142. // If we have a addi(toc@l)/addis(toc@ha) pair, and the addis has only
  6143. // one use, then we can do this for any offset, we just need to also
  6144. // update the offset (i.e. the symbol addend) on the addis also.
  6145. if (Base.getMachineOpcode() != PPC::ADDItocL)
  6146. continue;
  6147. if (!HBase.isMachineOpcode() ||
  6148. HBase.getMachineOpcode() != PPC::ADDIStocHA8)
  6149. continue;
  6150. if (!Base.hasOneUse() || !HBase.hasOneUse())
  6151. continue;
  6152. SDValue HImmOpnd = HBase.getOperand(1);
  6153. if (HImmOpnd != ImmOpnd)
  6154. continue;
  6155. UpdateHBase = true;
  6156. }
  6157. } else {
  6158. // If we're directly folding the addend from an addi instruction, then:
  6159. // 1. In general, the offset on the memory access must be zero.
  6160. // 2. If the addend is a constant, then it can be combined with a
  6161. // non-zero offset, but only if the result meets the encoding
  6162. // requirements.
  6163. if (auto *C = dyn_cast<ConstantSDNode>(ImmOpnd)) {
  6164. Offset += C->getSExtValue();
  6165. if (RequiresMod4Offset && (Offset % 4) != 0)
  6166. continue;
  6167. if (!isInt<16>(Offset))
  6168. continue;
  6169. ImmOpnd = CurDAG->getTargetConstant(Offset, SDLoc(ImmOpnd),
  6170. ImmOpnd.getValueType());
  6171. } else if (Offset != 0) {
  6172. continue;
  6173. }
  6174. }
  6175. // We found an opportunity. Reverse the operands from the add
  6176. // immediate and substitute them into the load or store. If
  6177. // needed, update the target flags for the immediate operand to
  6178. // reflect the necessary relocation information.
  6179. LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: ");
  6180. LLVM_DEBUG(Base->dump(CurDAG));
  6181. LLVM_DEBUG(dbgs() << "\nN: ");
  6182. LLVM_DEBUG(N->dump(CurDAG));
  6183. LLVM_DEBUG(dbgs() << "\n");
  6184. // If the relocation information isn't already present on the
  6185. // immediate operand, add it now.
  6186. if (ReplaceFlags) {
  6187. if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
  6188. SDLoc dl(GA);
  6189. const GlobalValue *GV = GA->getGlobal();
  6190. Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());
  6191. // We can't perform this optimization for data whose alignment
  6192. // is insufficient for the instruction encoding.
  6193. if (Alignment < 4 && (RequiresMod4Offset || (Offset % 4) != 0)) {
  6194. LLVM_DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n");
  6195. continue;
  6196. }
  6197. ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, Offset, Flags);
  6198. } else if (ConstantPoolSDNode *CP =
  6199. dyn_cast<ConstantPoolSDNode>(ImmOpnd)) {
  6200. const Constant *C = CP->getConstVal();
  6201. ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64, CP->getAlign(),
  6202. Offset, Flags);
  6203. }
  6204. }
  6205. if (FirstOp == 1) // Store
  6206. (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd,
  6207. Base.getOperand(0), N->getOperand(3));
  6208. else // Load
  6209. (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0),
  6210. N->getOperand(2));
  6211. if (UpdateHBase)
  6212. (void)CurDAG->UpdateNodeOperands(HBase.getNode(), HBase.getOperand(0),
  6213. ImmOpnd);
  6214. // The add-immediate may now be dead, in which case remove it.
  6215. if (Base.getNode()->use_empty())
  6216. CurDAG->RemoveDeadNode(Base.getNode());
  6217. }
  6218. }
  6219. /// createPPCISelDag - This pass converts a legalized DAG into a
  6220. /// PowerPC-specific DAG, ready for instruction scheduling.
  6221. ///
  6222. FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM,
  6223. CodeGenOpt::Level OptLevel) {
  6224. return new PPCDAGToDAGISel(TM, OptLevel);
  6225. }