SimplifyCFG.cpp 279 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834683568366837683868396840684168426843684468456846684768486849685068516852685368546855685668576858685968606861686268636864686568666867686868696870687168726873687468756876687768786879688068816882688368846885688668876888688968906891689268936894689568966897689868996900690169026903690469056906690769086909691069116912691369146915691669176918691969206921692269236924692569266927692869296930693169326933693469356936693769386939694069416942694369446945694669476948694969506951695269536954695569566957695869596960696169626963696469656966696769686969697069716972697369746975697669776978697969806981698269836984698569866987698869896990699169926993699469956996699769986999700070017002700370047005700670077008700970107011701270137014701570167017701870197020702170227023702470257026702770287029703070317032703370347035703670377038703970407041704270437044704570467047704870497050705170527053705470557056705770587059706070617062706370647065706670677068706970707071707270737074707570767077707870797080708170827083708470857086708770887089709070917092709370947095709670977098709971007101710271037104710571067107710871097110711171127113711471157116711771187119712071217122712371247125712671277128712971307131713271337134713571367137713871397140714171427143714471457146714771487149715071517152715371547155715671577158715971607161716271637164716571667167716871697170717171727173717471757176717771787179718071817182718371847185718671877188718971907191719271937194719571967197719871997200720172027203720472057206720772087209721072117212721372147215721672177218721972207221722272237224722572267227722872297230723172327233723472357236723772387239724072417242724372447245724672477248724972507251725272537254725572567257725872597260726172627263726472657266726772687269727072717272727372747275727672777278727972807281728272837284728572867287728872897290729172927293729472957296729772987299730073017302730373047305730673077308730973107311731273137314731573167317731873197320732173227323732473257326732773287329733073317332733373347335733673377338733973407341
  1. //===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // Peephole optimize the CFG.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "llvm/ADT/APInt.h"
  13. #include "llvm/ADT/ArrayRef.h"
  14. #include "llvm/ADT/DenseMap.h"
  15. #include "llvm/ADT/MapVector.h"
  16. #include "llvm/ADT/STLExtras.h"
  17. #include "llvm/ADT/ScopeExit.h"
  18. #include "llvm/ADT/Sequence.h"
  19. #include "llvm/ADT/SetOperations.h"
  20. #include "llvm/ADT/SetVector.h"
  21. #include "llvm/ADT/SmallPtrSet.h"
  22. #include "llvm/ADT/SmallVector.h"
  23. #include "llvm/ADT/Statistic.h"
  24. #include "llvm/ADT/StringRef.h"
  25. #include "llvm/Analysis/AssumptionCache.h"
  26. #include "llvm/Analysis/CaptureTracking.h"
  27. #include "llvm/Analysis/ConstantFolding.h"
  28. #include "llvm/Analysis/DomTreeUpdater.h"
  29. #include "llvm/Analysis/GuardUtils.h"
  30. #include "llvm/Analysis/InstructionSimplify.h"
  31. #include "llvm/Analysis/MemorySSA.h"
  32. #include "llvm/Analysis/MemorySSAUpdater.h"
  33. #include "llvm/Analysis/TargetTransformInfo.h"
  34. #include "llvm/Analysis/ValueTracking.h"
  35. #include "llvm/IR/Attributes.h"
  36. #include "llvm/IR/BasicBlock.h"
  37. #include "llvm/IR/CFG.h"
  38. #include "llvm/IR/Constant.h"
  39. #include "llvm/IR/ConstantRange.h"
  40. #include "llvm/IR/Constants.h"
  41. #include "llvm/IR/DataLayout.h"
  42. #include "llvm/IR/DebugInfo.h"
  43. #include "llvm/IR/DerivedTypes.h"
  44. #include "llvm/IR/Function.h"
  45. #include "llvm/IR/GlobalValue.h"
  46. #include "llvm/IR/GlobalVariable.h"
  47. #include "llvm/IR/IRBuilder.h"
  48. #include "llvm/IR/InstrTypes.h"
  49. #include "llvm/IR/Instruction.h"
  50. #include "llvm/IR/Instructions.h"
  51. #include "llvm/IR/IntrinsicInst.h"
  52. #include "llvm/IR/LLVMContext.h"
  53. #include "llvm/IR/MDBuilder.h"
  54. #include "llvm/IR/Metadata.h"
  55. #include "llvm/IR/Module.h"
  56. #include "llvm/IR/NoFolder.h"
  57. #include "llvm/IR/Operator.h"
  58. #include "llvm/IR/PatternMatch.h"
  59. #include "llvm/IR/ProfDataUtils.h"
  60. #include "llvm/IR/Type.h"
  61. #include "llvm/IR/Use.h"
  62. #include "llvm/IR/User.h"
  63. #include "llvm/IR/Value.h"
  64. #include "llvm/IR/ValueHandle.h"
  65. #include "llvm/Support/BranchProbability.h"
  66. #include "llvm/Support/Casting.h"
  67. #include "llvm/Support/CommandLine.h"
  68. #include "llvm/Support/Debug.h"
  69. #include "llvm/Support/ErrorHandling.h"
  70. #include "llvm/Support/KnownBits.h"
  71. #include "llvm/Support/MathExtras.h"
  72. #include "llvm/Support/raw_ostream.h"
  73. #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  74. #include "llvm/Transforms/Utils/Local.h"
  75. #include "llvm/Transforms/Utils/ValueMapper.h"
  76. #include <algorithm>
  77. #include <cassert>
  78. #include <climits>
  79. #include <cstddef>
  80. #include <cstdint>
  81. #include <iterator>
  82. #include <map>
  83. #include <optional>
  84. #include <set>
  85. #include <tuple>
  86. #include <utility>
  87. #include <vector>
  88. using namespace llvm;
  89. using namespace PatternMatch;
  90. #define DEBUG_TYPE "simplifycfg"
  91. cl::opt<bool> llvm::RequireAndPreserveDomTree(
  92. "simplifycfg-require-and-preserve-domtree", cl::Hidden,
  93. cl::desc("Temorary development switch used to gradually uplift SimplifyCFG "
  94. "into preserving DomTree,"));
  95. // Chosen as 2 so as to be cheap, but still to have enough power to fold
  96. // a select, so the "clamp" idiom (of a min followed by a max) will be caught.
  97. // To catch this, we need to fold a compare and a select, hence '2' being the
  98. // minimum reasonable default.
  99. static cl::opt<unsigned> PHINodeFoldingThreshold(
  100. "phi-node-folding-threshold", cl::Hidden, cl::init(2),
  101. cl::desc(
  102. "Control the amount of phi node folding to perform (default = 2)"));
  103. static cl::opt<unsigned> TwoEntryPHINodeFoldingThreshold(
  104. "two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4),
  105. cl::desc("Control the maximal total instruction cost that we are willing "
  106. "to speculatively execute to fold a 2-entry PHI node into a "
  107. "select (default = 4)"));
  108. static cl::opt<bool>
  109. HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true),
  110. cl::desc("Hoist common instructions up to the parent block"));
  111. static cl::opt<unsigned>
  112. HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden,
  113. cl::init(20),
  114. cl::desc("Allow reordering across at most this many "
  115. "instructions when hoisting"));
  116. static cl::opt<bool>
  117. SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
  118. cl::desc("Sink common instructions down to the end block"));
  119. static cl::opt<bool> HoistCondStores(
  120. "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
  121. cl::desc("Hoist conditional stores if an unconditional store precedes"));
  122. static cl::opt<bool> MergeCondStores(
  123. "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true),
  124. cl::desc("Hoist conditional stores even if an unconditional store does not "
  125. "precede - hoist multiple conditional stores into a single "
  126. "predicated store"));
  127. static cl::opt<bool> MergeCondStoresAggressively(
  128. "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false),
  129. cl::desc("When merging conditional stores, do so even if the resultant "
  130. "basic blocks are unlikely to be if-converted as a result"));
  131. static cl::opt<bool> SpeculateOneExpensiveInst(
  132. "speculate-one-expensive-inst", cl::Hidden, cl::init(true),
  133. cl::desc("Allow exactly one expensive instruction to be speculatively "
  134. "executed"));
  135. static cl::opt<unsigned> MaxSpeculationDepth(
  136. "max-speculation-depth", cl::Hidden, cl::init(10),
  137. cl::desc("Limit maximum recursion depth when calculating costs of "
  138. "speculatively executed instructions"));
  139. static cl::opt<int>
  140. MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden,
  141. cl::init(10),
  142. cl::desc("Max size of a block which is still considered "
  143. "small enough to thread through"));
  144. // Two is chosen to allow one negation and a logical combine.
  145. static cl::opt<unsigned>
  146. BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden,
  147. cl::init(2),
  148. cl::desc("Maximum cost of combining conditions when "
  149. "folding branches"));
  150. static cl::opt<unsigned> BranchFoldToCommonDestVectorMultiplier(
  151. "simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden,
  152. cl::init(2),
  153. cl::desc("Multiplier to apply to threshold when determining whether or not "
  154. "to fold branch to common destination when vector operations are "
  155. "present"));
  156. static cl::opt<bool> EnableMergeCompatibleInvokes(
  157. "simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true),
  158. cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"));
  159. static cl::opt<unsigned> MaxSwitchCasesPerResult(
  160. "max-switch-cases-per-result", cl::Hidden, cl::init(16),
  161. cl::desc("Limit cases to analyze when converting a switch to select"));
  162. STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
  163. STATISTIC(NumLinearMaps,
  164. "Number of switch instructions turned into linear mapping");
  165. STATISTIC(NumLookupTables,
  166. "Number of switch instructions turned into lookup tables");
  167. STATISTIC(
  168. NumLookupTablesHoles,
  169. "Number of switch instructions turned into lookup tables (holes checked)");
  170. STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
  171. STATISTIC(NumFoldValueComparisonIntoPredecessors,
  172. "Number of value comparisons folded into predecessor basic blocks");
  173. STATISTIC(NumFoldBranchToCommonDest,
  174. "Number of branches folded into predecessor basic block");
  175. STATISTIC(
  176. NumHoistCommonCode,
  177. "Number of common instruction 'blocks' hoisted up to the begin block");
  178. STATISTIC(NumHoistCommonInstrs,
  179. "Number of common instructions hoisted up to the begin block");
  180. STATISTIC(NumSinkCommonCode,
  181. "Number of common instruction 'blocks' sunk down to the end block");
  182. STATISTIC(NumSinkCommonInstrs,
  183. "Number of common instructions sunk down to the end block");
  184. STATISTIC(NumSpeculations, "Number of speculative executed instructions");
  185. STATISTIC(NumInvokes,
  186. "Number of invokes with empty resume blocks simplified into calls");
  187. STATISTIC(NumInvokesMerged, "Number of invokes that were merged together");
  188. STATISTIC(NumInvokeSetsFormed, "Number of invoke sets that were formed");
  189. namespace {
  190. // The first field contains the value that the switch produces when a certain
  191. // case group is selected, and the second field is a vector containing the
  192. // cases composing the case group.
  193. using SwitchCaseResultVectorTy =
  194. SmallVector<std::pair<Constant *, SmallVector<ConstantInt *, 4>>, 2>;
  195. // The first field contains the phi node that generates a result of the switch
  196. // and the second field contains the value generated for a certain case in the
  197. // switch for that PHI.
  198. using SwitchCaseResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
  199. /// ValueEqualityComparisonCase - Represents a case of a switch.
  200. struct ValueEqualityComparisonCase {
  201. ConstantInt *Value;
  202. BasicBlock *Dest;
  203. ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest)
  204. : Value(Value), Dest(Dest) {}
  205. bool operator<(ValueEqualityComparisonCase RHS) const {
  206. // Comparing pointers is ok as we only rely on the order for uniquing.
  207. return Value < RHS.Value;
  208. }
  209. bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
  210. };
  211. class SimplifyCFGOpt {
  212. const TargetTransformInfo &TTI;
  213. DomTreeUpdater *DTU;
  214. const DataLayout &DL;
  215. ArrayRef<WeakVH> LoopHeaders;
  216. const SimplifyCFGOptions &Options;
  217. bool Resimplify;
  218. Value *isValueEqualityComparison(Instruction *TI);
  219. BasicBlock *GetValueEqualityComparisonCases(
  220. Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases);
  221. bool SimplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI,
  222. BasicBlock *Pred,
  223. IRBuilder<> &Builder);
  224. bool PerformValueComparisonIntoPredecessorFolding(Instruction *TI, Value *&CV,
  225. Instruction *PTI,
  226. IRBuilder<> &Builder);
  227. bool FoldValueComparisonIntoPredecessors(Instruction *TI,
  228. IRBuilder<> &Builder);
  229. bool simplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
  230. bool simplifySingleResume(ResumeInst *RI);
  231. bool simplifyCommonResume(ResumeInst *RI);
  232. bool simplifyCleanupReturn(CleanupReturnInst *RI);
  233. bool simplifyUnreachable(UnreachableInst *UI);
  234. bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
  235. bool simplifyIndirectBr(IndirectBrInst *IBI);
  236. bool simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder);
  237. bool simplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder);
  238. bool simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder);
  239. bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
  240. IRBuilder<> &Builder);
  241. bool HoistThenElseCodeToIf(BranchInst *BI, const TargetTransformInfo &TTI,
  242. bool EqTermsOnly);
  243. bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
  244. const TargetTransformInfo &TTI);
  245. bool SimplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
  246. BasicBlock *TrueBB, BasicBlock *FalseBB,
  247. uint32_t TrueWeight, uint32_t FalseWeight);
  248. bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
  249. const DataLayout &DL);
  250. bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select);
  251. bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI);
  252. bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder);
  253. public:
  254. SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU,
  255. const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders,
  256. const SimplifyCFGOptions &Opts)
  257. : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {
  258. assert((!DTU || !DTU->hasPostDomTree()) &&
  259. "SimplifyCFG is not yet capable of maintaining validity of a "
  260. "PostDomTree, so don't ask for it.");
  261. }
  262. bool simplifyOnce(BasicBlock *BB);
  263. bool run(BasicBlock *BB);
  264. // Helper to set Resimplify and return change indication.
  265. bool requestResimplify() {
  266. Resimplify = true;
  267. return true;
  268. }
  269. };
  270. } // end anonymous namespace
  271. /// Return true if all the PHI nodes in the basic block \p BB
  272. /// receive compatible (identical) incoming values when coming from
  273. /// all of the predecessor blocks that are specified in \p IncomingBlocks.
  274. ///
  275. /// Note that if the values aren't exactly identical, but \p EquivalenceSet
  276. /// is provided, and *both* of the values are present in the set,
  277. /// then they are considered equal.
  278. static bool IncomingValuesAreCompatible(
  279. BasicBlock *BB, ArrayRef<BasicBlock *> IncomingBlocks,
  280. SmallPtrSetImpl<Value *> *EquivalenceSet = nullptr) {
  281. assert(IncomingBlocks.size() == 2 &&
  282. "Only for a pair of incoming blocks at the time!");
  283. // FIXME: it is okay if one of the incoming values is an `undef` value,
  284. // iff the other incoming value is guaranteed to be a non-poison value.
  285. // FIXME: it is okay if one of the incoming values is a `poison` value.
  286. return all_of(BB->phis(), [IncomingBlocks, EquivalenceSet](PHINode &PN) {
  287. Value *IV0 = PN.getIncomingValueForBlock(IncomingBlocks[0]);
  288. Value *IV1 = PN.getIncomingValueForBlock(IncomingBlocks[1]);
  289. if (IV0 == IV1)
  290. return true;
  291. if (EquivalenceSet && EquivalenceSet->contains(IV0) &&
  292. EquivalenceSet->contains(IV1))
  293. return true;
  294. return false;
  295. });
  296. }
  297. /// Return true if it is safe to merge these two
  298. /// terminator instructions together.
  299. static bool
  300. SafeToMergeTerminators(Instruction *SI1, Instruction *SI2,
  301. SmallSetVector<BasicBlock *, 4> *FailBlocks = nullptr) {
  302. if (SI1 == SI2)
  303. return false; // Can't merge with self!
  304. // It is not safe to merge these two switch instructions if they have a common
  305. // successor, and if that successor has a PHI node, and if *that* PHI node has
  306. // conflicting incoming values from the two switch blocks.
  307. BasicBlock *SI1BB = SI1->getParent();
  308. BasicBlock *SI2BB = SI2->getParent();
  309. SmallPtrSet<BasicBlock *, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB));
  310. bool Fail = false;
  311. for (BasicBlock *Succ : successors(SI2BB)) {
  312. if (!SI1Succs.count(Succ))
  313. continue;
  314. if (IncomingValuesAreCompatible(Succ, {SI1BB, SI2BB}))
  315. continue;
  316. Fail = true;
  317. if (FailBlocks)
  318. FailBlocks->insert(Succ);
  319. else
  320. break;
  321. }
  322. return !Fail;
  323. }
  324. /// Update PHI nodes in Succ to indicate that there will now be entries in it
  325. /// from the 'NewPred' block. The values that will be flowing into the PHI nodes
  326. /// will be the same as those coming in from ExistPred, an existing predecessor
  327. /// of Succ.
  328. static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
  329. BasicBlock *ExistPred,
  330. MemorySSAUpdater *MSSAU = nullptr) {
  331. for (PHINode &PN : Succ->phis())
  332. PN.addIncoming(PN.getIncomingValueForBlock(ExistPred), NewPred);
  333. if (MSSAU)
  334. if (auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(Succ))
  335. MPhi->addIncoming(MPhi->getIncomingValueForBlock(ExistPred), NewPred);
  336. }
  337. /// Compute an abstract "cost" of speculating the given instruction,
  338. /// which is assumed to be safe to speculate. TCC_Free means cheap,
  339. /// TCC_Basic means less cheap, and TCC_Expensive means prohibitively
  340. /// expensive.
  341. static InstructionCost computeSpeculationCost(const User *I,
  342. const TargetTransformInfo &TTI) {
  343. assert((!isa<Instruction>(I) ||
  344. isSafeToSpeculativelyExecute(cast<Instruction>(I))) &&
  345. "Instruction is not safe to speculatively execute!");
  346. return TTI.getInstructionCost(I, TargetTransformInfo::TCK_SizeAndLatency);
  347. }
  348. /// If we have a merge point of an "if condition" as accepted above,
  349. /// return true if the specified value dominates the block. We
  350. /// don't handle the true generality of domination here, just a special case
  351. /// which works well enough for us.
  352. ///
  353. /// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
  354. /// see if V (which must be an instruction) and its recursive operands
  355. /// that do not dominate BB have a combined cost lower than Budget and
  356. /// are non-trapping. If both are true, the instruction is inserted into the
  357. /// set and true is returned.
  358. ///
  359. /// The cost for most non-trapping instructions is defined as 1 except for
  360. /// Select whose cost is 2.
  361. ///
  362. /// After this function returns, Cost is increased by the cost of
  363. /// V plus its non-dominating operands. If that cost is greater than
  364. /// Budget, false is returned and Cost is undefined.
  365. static bool dominatesMergePoint(Value *V, BasicBlock *BB,
  366. SmallPtrSetImpl<Instruction *> &AggressiveInsts,
  367. InstructionCost &Cost,
  368. InstructionCost Budget,
  369. const TargetTransformInfo &TTI,
  370. unsigned Depth = 0) {
  371. // It is possible to hit a zero-cost cycle (phi/gep instructions for example),
  372. // so limit the recursion depth.
  373. // TODO: While this recursion limit does prevent pathological behavior, it
  374. // would be better to track visited instructions to avoid cycles.
  375. if (Depth == MaxSpeculationDepth)
  376. return false;
  377. Instruction *I = dyn_cast<Instruction>(V);
  378. if (!I) {
  379. // Non-instructions dominate all instructions and can be executed
  380. // unconditionally.
  381. return true;
  382. }
  383. BasicBlock *PBB = I->getParent();
  384. // We don't want to allow weird loops that might have the "if condition" in
  385. // the bottom of this block.
  386. if (PBB == BB)
  387. return false;
  388. // If this instruction is defined in a block that contains an unconditional
  389. // branch to BB, then it must be in the 'conditional' part of the "if
  390. // statement". If not, it definitely dominates the region.
  391. BranchInst *BI = dyn_cast<BranchInst>(PBB->getTerminator());
  392. if (!BI || BI->isConditional() || BI->getSuccessor(0) != BB)
  393. return true;
  394. // If we have seen this instruction before, don't count it again.
  395. if (AggressiveInsts.count(I))
  396. return true;
  397. // Okay, it looks like the instruction IS in the "condition". Check to
  398. // see if it's a cheap instruction to unconditionally compute, and if it
  399. // only uses stuff defined outside of the condition. If so, hoist it out.
  400. if (!isSafeToSpeculativelyExecute(I))
  401. return false;
  402. Cost += computeSpeculationCost(I, TTI);
  403. // Allow exactly one instruction to be speculated regardless of its cost
  404. // (as long as it is safe to do so).
  405. // This is intended to flatten the CFG even if the instruction is a division
  406. // or other expensive operation. The speculation of an expensive instruction
  407. // is expected to be undone in CodeGenPrepare if the speculation has not
  408. // enabled further IR optimizations.
  409. if (Cost > Budget &&
  410. (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0 ||
  411. !Cost.isValid()))
  412. return false;
  413. // Okay, we can only really hoist these out if their operands do
  414. // not take us over the cost threshold.
  415. for (Use &Op : I->operands())
  416. if (!dominatesMergePoint(Op, BB, AggressiveInsts, Cost, Budget, TTI,
  417. Depth + 1))
  418. return false;
  419. // Okay, it's safe to do this! Remember this instruction.
  420. AggressiveInsts.insert(I);
  421. return true;
  422. }
  423. /// Extract ConstantInt from value, looking through IntToPtr
  424. /// and PointerNullValue. Return NULL if value is not a constant int.
  425. static ConstantInt *GetConstantInt(Value *V, const DataLayout &DL) {
  426. // Normal constant int.
  427. ConstantInt *CI = dyn_cast<ConstantInt>(V);
  428. if (CI || !isa<Constant>(V) || !V->getType()->isPointerTy() ||
  429. DL.isNonIntegralPointerType(V->getType()))
  430. return CI;
  431. // This is some kind of pointer constant. Turn it into a pointer-sized
  432. // ConstantInt if possible.
  433. IntegerType *PtrTy = cast<IntegerType>(DL.getIntPtrType(V->getType()));
  434. // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
  435. if (isa<ConstantPointerNull>(V))
  436. return ConstantInt::get(PtrTy, 0);
  437. // IntToPtr const int.
  438. if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
  439. if (CE->getOpcode() == Instruction::IntToPtr)
  440. if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(0))) {
  441. // The constant is very likely to have the right type already.
  442. if (CI->getType() == PtrTy)
  443. return CI;
  444. else
  445. return cast<ConstantInt>(
  446. ConstantExpr::getIntegerCast(CI, PtrTy, /*isSigned=*/false));
  447. }
  448. return nullptr;
  449. }
  450. namespace {
  451. /// Given a chain of or (||) or and (&&) comparison of a value against a
  452. /// constant, this will try to recover the information required for a switch
  453. /// structure.
  454. /// It will depth-first traverse the chain of comparison, seeking for patterns
  455. /// like %a == 12 or %a < 4 and combine them to produce a set of integer
  456. /// representing the different cases for the switch.
  457. /// Note that if the chain is composed of '||' it will build the set of elements
  458. /// that matches the comparisons (i.e. any of this value validate the chain)
  459. /// while for a chain of '&&' it will build the set elements that make the test
  460. /// fail.
  461. struct ConstantComparesGatherer {
  462. const DataLayout &DL;
  463. /// Value found for the switch comparison
  464. Value *CompValue = nullptr;
  465. /// Extra clause to be checked before the switch
  466. Value *Extra = nullptr;
  467. /// Set of integers to match in switch
  468. SmallVector<ConstantInt *, 8> Vals;
  469. /// Number of comparisons matched in the and/or chain
  470. unsigned UsedICmps = 0;
  471. /// Construct and compute the result for the comparison instruction Cond
  472. ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) : DL(DL) {
  473. gather(Cond);
  474. }
  475. ConstantComparesGatherer(const ConstantComparesGatherer &) = delete;
  476. ConstantComparesGatherer &
  477. operator=(const ConstantComparesGatherer &) = delete;
  478. private:
  479. /// Try to set the current value used for the comparison, it succeeds only if
  480. /// it wasn't set before or if the new value is the same as the old one
  481. bool setValueOnce(Value *NewVal) {
  482. if (CompValue && CompValue != NewVal)
  483. return false;
  484. CompValue = NewVal;
  485. return (CompValue != nullptr);
  486. }
  487. /// Try to match Instruction "I" as a comparison against a constant and
  488. /// populates the array Vals with the set of values that match (or do not
  489. /// match depending on isEQ).
  490. /// Return false on failure. On success, the Value the comparison matched
  491. /// against is placed in CompValue.
  492. /// If CompValue is already set, the function is expected to fail if a match
  493. /// is found but the value compared to is different.
  494. bool matchInstruction(Instruction *I, bool isEQ) {
  495. // If this is an icmp against a constant, handle this as one of the cases.
  496. ICmpInst *ICI;
  497. ConstantInt *C;
  498. if (!((ICI = dyn_cast<ICmpInst>(I)) &&
  499. (C = GetConstantInt(I->getOperand(1), DL)))) {
  500. return false;
  501. }
  502. Value *RHSVal;
  503. const APInt *RHSC;
  504. // Pattern match a special case
  505. // (x & ~2^z) == y --> x == y || x == y|2^z
  506. // This undoes a transformation done by instcombine to fuse 2 compares.
  507. if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) {
  508. // It's a little bit hard to see why the following transformations are
  509. // correct. Here is a CVC3 program to verify them for 64-bit values:
  510. /*
  511. ONE : BITVECTOR(64) = BVZEROEXTEND(0bin1, 63);
  512. x : BITVECTOR(64);
  513. y : BITVECTOR(64);
  514. z : BITVECTOR(64);
  515. mask : BITVECTOR(64) = BVSHL(ONE, z);
  516. QUERY( (y & ~mask = y) =>
  517. ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
  518. );
  519. QUERY( (y | mask = y) =>
  520. ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
  521. );
  522. */
  523. // Please note that each pattern must be a dual implication (<--> or
  524. // iff). One directional implication can create spurious matches. If the
  525. // implication is only one-way, an unsatisfiable condition on the left
  526. // side can imply a satisfiable condition on the right side. Dual
  527. // implication ensures that satisfiable conditions are transformed to
  528. // other satisfiable conditions and unsatisfiable conditions are
  529. // transformed to other unsatisfiable conditions.
  530. // Here is a concrete example of a unsatisfiable condition on the left
  531. // implying a satisfiable condition on the right:
  532. //
  533. // mask = (1 << z)
  534. // (x & ~mask) == y --> (x == y || x == (y | mask))
  535. //
  536. // Substituting y = 3, z = 0 yields:
  537. // (x & -2) == 3 --> (x == 3 || x == 2)
  538. // Pattern match a special case:
  539. /*
  540. QUERY( (y & ~mask = y) =>
  541. ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
  542. );
  543. */
  544. if (match(ICI->getOperand(0),
  545. m_And(m_Value(RHSVal), m_APInt(RHSC)))) {
  546. APInt Mask = ~*RHSC;
  547. if (Mask.isPowerOf2() && (C->getValue() & ~Mask) == C->getValue()) {
  548. // If we already have a value for the switch, it has to match!
  549. if (!setValueOnce(RHSVal))
  550. return false;
  551. Vals.push_back(C);
  552. Vals.push_back(
  553. ConstantInt::get(C->getContext(),
  554. C->getValue() | Mask));
  555. UsedICmps++;
  556. return true;
  557. }
  558. }
  559. // Pattern match a special case:
  560. /*
  561. QUERY( (y | mask = y) =>
  562. ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
  563. );
  564. */
  565. if (match(ICI->getOperand(0),
  566. m_Or(m_Value(RHSVal), m_APInt(RHSC)))) {
  567. APInt Mask = *RHSC;
  568. if (Mask.isPowerOf2() && (C->getValue() | Mask) == C->getValue()) {
  569. // If we already have a value for the switch, it has to match!
  570. if (!setValueOnce(RHSVal))
  571. return false;
  572. Vals.push_back(C);
  573. Vals.push_back(ConstantInt::get(C->getContext(),
  574. C->getValue() & ~Mask));
  575. UsedICmps++;
  576. return true;
  577. }
  578. }
  579. // If we already have a value for the switch, it has to match!
  580. if (!setValueOnce(ICI->getOperand(0)))
  581. return false;
  582. UsedICmps++;
  583. Vals.push_back(C);
  584. return ICI->getOperand(0);
  585. }
  586. // If we have "x ult 3", for example, then we can add 0,1,2 to the set.
  587. ConstantRange Span =
  588. ConstantRange::makeExactICmpRegion(ICI->getPredicate(), C->getValue());
  589. // Shift the range if the compare is fed by an add. This is the range
  590. // compare idiom as emitted by instcombine.
  591. Value *CandidateVal = I->getOperand(0);
  592. if (match(I->getOperand(0), m_Add(m_Value(RHSVal), m_APInt(RHSC)))) {
  593. Span = Span.subtract(*RHSC);
  594. CandidateVal = RHSVal;
  595. }
  596. // If this is an and/!= check, then we are looking to build the set of
  597. // value that *don't* pass the and chain. I.e. to turn "x ugt 2" into
  598. // x != 0 && x != 1.
  599. if (!isEQ)
  600. Span = Span.inverse();
  601. // If there are a ton of values, we don't want to make a ginormous switch.
  602. if (Span.isSizeLargerThan(8) || Span.isEmptySet()) {
  603. return false;
  604. }
  605. // If we already have a value for the switch, it has to match!
  606. if (!setValueOnce(CandidateVal))
  607. return false;
  608. // Add all values from the range to the set
  609. for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
  610. Vals.push_back(ConstantInt::get(I->getContext(), Tmp));
  611. UsedICmps++;
  612. return true;
  613. }
  614. /// Given a potentially 'or'd or 'and'd together collection of icmp
  615. /// eq/ne/lt/gt instructions that compare a value against a constant, extract
  616. /// the value being compared, and stick the list constants into the Vals
  617. /// vector.
  618. /// One "Extra" case is allowed to differ from the other.
  619. void gather(Value *V) {
  620. bool isEQ = match(V, m_LogicalOr(m_Value(), m_Value()));
  621. // Keep a stack (SmallVector for efficiency) for depth-first traversal
  622. SmallVector<Value *, 8> DFT;
  623. SmallPtrSet<Value *, 8> Visited;
  624. // Initialize
  625. Visited.insert(V);
  626. DFT.push_back(V);
  627. while (!DFT.empty()) {
  628. V = DFT.pop_back_val();
  629. if (Instruction *I = dyn_cast<Instruction>(V)) {
  630. // If it is a || (or && depending on isEQ), process the operands.
  631. Value *Op0, *Op1;
  632. if (isEQ ? match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1)))
  633. : match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
  634. if (Visited.insert(Op1).second)
  635. DFT.push_back(Op1);
  636. if (Visited.insert(Op0).second)
  637. DFT.push_back(Op0);
  638. continue;
  639. }
  640. // Try to match the current instruction
  641. if (matchInstruction(I, isEQ))
  642. // Match succeed, continue the loop
  643. continue;
  644. }
  645. // One element of the sequence of || (or &&) could not be match as a
  646. // comparison against the same value as the others.
  647. // We allow only one "Extra" case to be checked before the switch
  648. if (!Extra) {
  649. Extra = V;
  650. continue;
  651. }
  652. // Failed to parse a proper sequence, abort now
  653. CompValue = nullptr;
  654. break;
  655. }
  656. }
  657. };
  658. } // end anonymous namespace
  659. static void EraseTerminatorAndDCECond(Instruction *TI,
  660. MemorySSAUpdater *MSSAU = nullptr) {
  661. Instruction *Cond = nullptr;
  662. if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
  663. Cond = dyn_cast<Instruction>(SI->getCondition());
  664. } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
  665. if (BI->isConditional())
  666. Cond = dyn_cast<Instruction>(BI->getCondition());
  667. } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(TI)) {
  668. Cond = dyn_cast<Instruction>(IBI->getAddress());
  669. }
  670. TI->eraseFromParent();
  671. if (Cond)
  672. RecursivelyDeleteTriviallyDeadInstructions(Cond, nullptr, MSSAU);
  673. }
  674. /// Return true if the specified terminator checks
  675. /// to see if a value is equal to constant integer value.
  676. Value *SimplifyCFGOpt::isValueEqualityComparison(Instruction *TI) {
  677. Value *CV = nullptr;
  678. if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
  679. // Do not permit merging of large switch instructions into their
  680. // predecessors unless there is only one predecessor.
  681. if (!SI->getParent()->hasNPredecessorsOrMore(128 / SI->getNumSuccessors()))
  682. CV = SI->getCondition();
  683. } else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
  684. if (BI->isConditional() && BI->getCondition()->hasOneUse())
  685. if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
  686. if (ICI->isEquality() && GetConstantInt(ICI->getOperand(1), DL))
  687. CV = ICI->getOperand(0);
  688. }
  689. // Unwrap any lossless ptrtoint cast.
  690. if (CV) {
  691. if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) {
  692. Value *Ptr = PTII->getPointerOperand();
  693. if (PTII->getType() == DL.getIntPtrType(Ptr->getType()))
  694. CV = Ptr;
  695. }
  696. }
  697. return CV;
  698. }
  699. /// Given a value comparison instruction,
  700. /// decode all of the 'cases' that it represents and return the 'default' block.
  701. BasicBlock *SimplifyCFGOpt::GetValueEqualityComparisonCases(
  702. Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
  703. if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
  704. Cases.reserve(SI->getNumCases());
  705. for (auto Case : SI->cases())
  706. Cases.push_back(ValueEqualityComparisonCase(Case.getCaseValue(),
  707. Case.getCaseSuccessor()));
  708. return SI->getDefaultDest();
  709. }
  710. BranchInst *BI = cast<BranchInst>(TI);
  711. ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
  712. BasicBlock *Succ = BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_NE);
  713. Cases.push_back(ValueEqualityComparisonCase(
  714. GetConstantInt(ICI->getOperand(1), DL), Succ));
  715. return BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_EQ);
  716. }
  717. /// Given a vector of bb/value pairs, remove any entries
  718. /// in the list that match the specified block.
  719. static void
  720. EliminateBlockCases(BasicBlock *BB,
  721. std::vector<ValueEqualityComparisonCase> &Cases) {
  722. llvm::erase_value(Cases, BB);
  723. }
  724. /// Return true if there are any keys in C1 that exist in C2 as well.
  725. static bool ValuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
  726. std::vector<ValueEqualityComparisonCase> &C2) {
  727. std::vector<ValueEqualityComparisonCase> *V1 = &C1, *V2 = &C2;
  728. // Make V1 be smaller than V2.
  729. if (V1->size() > V2->size())
  730. std::swap(V1, V2);
  731. if (V1->empty())
  732. return false;
  733. if (V1->size() == 1) {
  734. // Just scan V2.
  735. ConstantInt *TheVal = (*V1)[0].Value;
  736. for (const ValueEqualityComparisonCase &VECC : *V2)
  737. if (TheVal == VECC.Value)
  738. return true;
  739. }
  740. // Otherwise, just sort both lists and compare element by element.
  741. array_pod_sort(V1->begin(), V1->end());
  742. array_pod_sort(V2->begin(), V2->end());
  743. unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
  744. while (i1 != e1 && i2 != e2) {
  745. if ((*V1)[i1].Value == (*V2)[i2].Value)
  746. return true;
  747. if ((*V1)[i1].Value < (*V2)[i2].Value)
  748. ++i1;
  749. else
  750. ++i2;
  751. }
  752. return false;
  753. }
  754. // Set branch weights on SwitchInst. This sets the metadata if there is at
  755. // least one non-zero weight.
  756. static void setBranchWeights(SwitchInst *SI, ArrayRef<uint32_t> Weights) {
  757. // Check that there is at least one non-zero weight. Otherwise, pass
  758. // nullptr to setMetadata which will erase the existing metadata.
  759. MDNode *N = nullptr;
  760. if (llvm::any_of(Weights, [](uint32_t W) { return W != 0; }))
  761. N = MDBuilder(SI->getParent()->getContext()).createBranchWeights(Weights);
  762. SI->setMetadata(LLVMContext::MD_prof, N);
  763. }
  764. // Similar to the above, but for branch and select instructions that take
  765. // exactly 2 weights.
  766. static void setBranchWeights(Instruction *I, uint32_t TrueWeight,
  767. uint32_t FalseWeight) {
  768. assert(isa<BranchInst>(I) || isa<SelectInst>(I));
  769. // Check that there is at least one non-zero weight. Otherwise, pass
  770. // nullptr to setMetadata which will erase the existing metadata.
  771. MDNode *N = nullptr;
  772. if (TrueWeight || FalseWeight)
  773. N = MDBuilder(I->getParent()->getContext())
  774. .createBranchWeights(TrueWeight, FalseWeight);
  775. I->setMetadata(LLVMContext::MD_prof, N);
  776. }
  777. /// If TI is known to be a terminator instruction and its block is known to
  778. /// only have a single predecessor block, check to see if that predecessor is
  779. /// also a value comparison with the same value, and if that comparison
  780. /// determines the outcome of this comparison. If so, simplify TI. This does a
  781. /// very limited form of jump threading.
  782. bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(
  783. Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder) {
  784. Value *PredVal = isValueEqualityComparison(Pred->getTerminator());
  785. if (!PredVal)
  786. return false; // Not a value comparison in predecessor.
  787. Value *ThisVal = isValueEqualityComparison(TI);
  788. assert(ThisVal && "This isn't a value comparison!!");
  789. if (ThisVal != PredVal)
  790. return false; // Different predicates.
  791. // TODO: Preserve branch weight metadata, similarly to how
  792. // FoldValueComparisonIntoPredecessors preserves it.
  793. // Find out information about when control will move from Pred to TI's block.
  794. std::vector<ValueEqualityComparisonCase> PredCases;
  795. BasicBlock *PredDef =
  796. GetValueEqualityComparisonCases(Pred->getTerminator(), PredCases);
  797. EliminateBlockCases(PredDef, PredCases); // Remove default from cases.
  798. // Find information about how control leaves this block.
  799. std::vector<ValueEqualityComparisonCase> ThisCases;
  800. BasicBlock *ThisDef = GetValueEqualityComparisonCases(TI, ThisCases);
  801. EliminateBlockCases(ThisDef, ThisCases); // Remove default from cases.
  802. // If TI's block is the default block from Pred's comparison, potentially
  803. // simplify TI based on this knowledge.
  804. if (PredDef == TI->getParent()) {
  805. // If we are here, we know that the value is none of those cases listed in
  806. // PredCases. If there are any cases in ThisCases that are in PredCases, we
  807. // can simplify TI.
  808. if (!ValuesOverlap(PredCases, ThisCases))
  809. return false;
  810. if (isa<BranchInst>(TI)) {
  811. // Okay, one of the successors of this condbr is dead. Convert it to a
  812. // uncond br.
  813. assert(ThisCases.size() == 1 && "Branch can only have one case!");
  814. // Insert the new branch.
  815. Instruction *NI = Builder.CreateBr(ThisDef);
  816. (void)NI;
  817. // Remove PHI node entries for the dead edge.
  818. ThisCases[0].Dest->removePredecessor(PredDef);
  819. LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
  820. << "Through successor TI: " << *TI << "Leaving: " << *NI
  821. << "\n");
  822. EraseTerminatorAndDCECond(TI);
  823. if (DTU)
  824. DTU->applyUpdates(
  825. {{DominatorTree::Delete, PredDef, ThisCases[0].Dest}});
  826. return true;
  827. }
  828. SwitchInstProfUpdateWrapper SI = *cast<SwitchInst>(TI);
  829. // Okay, TI has cases that are statically dead, prune them away.
  830. SmallPtrSet<Constant *, 16> DeadCases;
  831. for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
  832. DeadCases.insert(PredCases[i].Value);
  833. LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
  834. << "Through successor TI: " << *TI);
  835. SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
  836. for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
  837. --i;
  838. auto *Successor = i->getCaseSuccessor();
  839. if (DTU)
  840. ++NumPerSuccessorCases[Successor];
  841. if (DeadCases.count(i->getCaseValue())) {
  842. Successor->removePredecessor(PredDef);
  843. SI.removeCase(i);
  844. if (DTU)
  845. --NumPerSuccessorCases[Successor];
  846. }
  847. }
  848. if (DTU) {
  849. std::vector<DominatorTree::UpdateType> Updates;
  850. for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
  851. if (I.second == 0)
  852. Updates.push_back({DominatorTree::Delete, PredDef, I.first});
  853. DTU->applyUpdates(Updates);
  854. }
  855. LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n");
  856. return true;
  857. }
  858. // Otherwise, TI's block must correspond to some matched value. Find out
  859. // which value (or set of values) this is.
  860. ConstantInt *TIV = nullptr;
  861. BasicBlock *TIBB = TI->getParent();
  862. for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
  863. if (PredCases[i].Dest == TIBB) {
  864. if (TIV)
  865. return false; // Cannot handle multiple values coming to this block.
  866. TIV = PredCases[i].Value;
  867. }
  868. assert(TIV && "No edge from pred to succ?");
  869. // Okay, we found the one constant that our value can be if we get into TI's
  870. // BB. Find out which successor will unconditionally be branched to.
  871. BasicBlock *TheRealDest = nullptr;
  872. for (unsigned i = 0, e = ThisCases.size(); i != e; ++i)
  873. if (ThisCases[i].Value == TIV) {
  874. TheRealDest = ThisCases[i].Dest;
  875. break;
  876. }
  877. // If not handled by any explicit cases, it is handled by the default case.
  878. if (!TheRealDest)
  879. TheRealDest = ThisDef;
  880. SmallPtrSet<BasicBlock *, 2> RemovedSuccs;
  881. // Remove PHI node entries for dead edges.
  882. BasicBlock *CheckEdge = TheRealDest;
  883. for (BasicBlock *Succ : successors(TIBB))
  884. if (Succ != CheckEdge) {
  885. if (Succ != TheRealDest)
  886. RemovedSuccs.insert(Succ);
  887. Succ->removePredecessor(TIBB);
  888. } else
  889. CheckEdge = nullptr;
  890. // Insert the new branch.
  891. Instruction *NI = Builder.CreateBr(TheRealDest);
  892. (void)NI;
  893. LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
  894. << "Through successor TI: " << *TI << "Leaving: " << *NI
  895. << "\n");
  896. EraseTerminatorAndDCECond(TI);
  897. if (DTU) {
  898. SmallVector<DominatorTree::UpdateType, 2> Updates;
  899. Updates.reserve(RemovedSuccs.size());
  900. for (auto *RemovedSucc : RemovedSuccs)
  901. Updates.push_back({DominatorTree::Delete, TIBB, RemovedSucc});
  902. DTU->applyUpdates(Updates);
  903. }
  904. return true;
  905. }
  906. namespace {
  907. /// This class implements a stable ordering of constant
  908. /// integers that does not depend on their address. This is important for
  909. /// applications that sort ConstantInt's to ensure uniqueness.
  910. struct ConstantIntOrdering {
  911. bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
  912. return LHS->getValue().ult(RHS->getValue());
  913. }
  914. };
  915. } // end anonymous namespace
  916. static int ConstantIntSortPredicate(ConstantInt *const *P1,
  917. ConstantInt *const *P2) {
  918. const ConstantInt *LHS = *P1;
  919. const ConstantInt *RHS = *P2;
  920. if (LHS == RHS)
  921. return 0;
  922. return LHS->getValue().ult(RHS->getValue()) ? 1 : -1;
  923. }
  924. /// Get Weights of a given terminator, the default weight is at the front
  925. /// of the vector. If TI is a conditional eq, we need to swap the branch-weight
  926. /// metadata.
  927. static void GetBranchWeights(Instruction *TI,
  928. SmallVectorImpl<uint64_t> &Weights) {
  929. MDNode *MD = TI->getMetadata(LLVMContext::MD_prof);
  930. assert(MD);
  931. for (unsigned i = 1, e = MD->getNumOperands(); i < e; ++i) {
  932. ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(i));
  933. Weights.push_back(CI->getValue().getZExtValue());
  934. }
  935. // If TI is a conditional eq, the default case is the false case,
  936. // and the corresponding branch-weight data is at index 2. We swap the
  937. // default weight to be the first entry.
  938. if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
  939. assert(Weights.size() == 2);
  940. ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
  941. if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
  942. std::swap(Weights.front(), Weights.back());
  943. }
  944. }
  945. /// Keep halving the weights until all can fit in uint32_t.
  946. static void FitWeights(MutableArrayRef<uint64_t> Weights) {
  947. uint64_t Max = *std::max_element(Weights.begin(), Weights.end());
  948. if (Max > UINT_MAX) {
  949. unsigned Offset = 32 - countLeadingZeros(Max);
  950. for (uint64_t &I : Weights)
  951. I >>= Offset;
  952. }
  953. }
  954. static void CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(
  955. BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap) {
  956. Instruction *PTI = PredBlock->getTerminator();
  957. // If we have bonus instructions, clone them into the predecessor block.
  958. // Note that there may be multiple predecessor blocks, so we cannot move
  959. // bonus instructions to a predecessor block.
  960. for (Instruction &BonusInst : *BB) {
  961. if (isa<DbgInfoIntrinsic>(BonusInst) || BonusInst.isTerminator())
  962. continue;
  963. Instruction *NewBonusInst = BonusInst.clone();
  964. if (PTI->getDebugLoc() != NewBonusInst->getDebugLoc()) {
  965. // Unless the instruction has the same !dbg location as the original
  966. // branch, drop it. When we fold the bonus instructions we want to make
  967. // sure we reset their debug locations in order to avoid stepping on
  968. // dead code caused by folding dead branches.
  969. NewBonusInst->setDebugLoc(DebugLoc());
  970. }
  971. RemapInstruction(NewBonusInst, VMap,
  972. RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
  973. VMap[&BonusInst] = NewBonusInst;
  974. // If we moved a load, we cannot any longer claim any knowledge about
  975. // its potential value. The previous information might have been valid
  976. // only given the branch precondition.
  977. // For an analogous reason, we must also drop all the metadata whose
  978. // semantics we don't understand. We *can* preserve !annotation, because
  979. // it is tied to the instruction itself, not the value or position.
  980. // Similarly strip attributes on call parameters that may cause UB in
  981. // location the call is moved to.
  982. NewBonusInst->dropUndefImplyingAttrsAndUnknownMetadata(
  983. LLVMContext::MD_annotation);
  984. NewBonusInst->insertInto(PredBlock, PTI->getIterator());
  985. NewBonusInst->takeName(&BonusInst);
  986. BonusInst.setName(NewBonusInst->getName() + ".old");
  987. // Update (liveout) uses of bonus instructions,
  988. // now that the bonus instruction has been cloned into predecessor.
  989. // Note that we expect to be in a block-closed SSA form for this to work!
  990. for (Use &U : make_early_inc_range(BonusInst.uses())) {
  991. auto *UI = cast<Instruction>(U.getUser());
  992. auto *PN = dyn_cast<PHINode>(UI);
  993. if (!PN) {
  994. assert(UI->getParent() == BB && BonusInst.comesBefore(UI) &&
  995. "If the user is not a PHI node, then it should be in the same "
  996. "block as, and come after, the original bonus instruction.");
  997. continue; // Keep using the original bonus instruction.
  998. }
  999. // Is this the block-closed SSA form PHI node?
  1000. if (PN->getIncomingBlock(U) == BB)
  1001. continue; // Great, keep using the original bonus instruction.
  1002. // The only other alternative is an "use" when coming from
  1003. // the predecessor block - here we should refer to the cloned bonus instr.
  1004. assert(PN->getIncomingBlock(U) == PredBlock &&
  1005. "Not in block-closed SSA form?");
  1006. U.set(NewBonusInst);
  1007. }
  1008. }
  1009. }
  1010. bool SimplifyCFGOpt::PerformValueComparisonIntoPredecessorFolding(
  1011. Instruction *TI, Value *&CV, Instruction *PTI, IRBuilder<> &Builder) {
  1012. BasicBlock *BB = TI->getParent();
  1013. BasicBlock *Pred = PTI->getParent();
  1014. SmallVector<DominatorTree::UpdateType, 32> Updates;
  1015. // Figure out which 'cases' to copy from SI to PSI.
  1016. std::vector<ValueEqualityComparisonCase> BBCases;
  1017. BasicBlock *BBDefault = GetValueEqualityComparisonCases(TI, BBCases);
  1018. std::vector<ValueEqualityComparisonCase> PredCases;
  1019. BasicBlock *PredDefault = GetValueEqualityComparisonCases(PTI, PredCases);
  1020. // Based on whether the default edge from PTI goes to BB or not, fill in
  1021. // PredCases and PredDefault with the new switch cases we would like to
  1022. // build.
  1023. SmallMapVector<BasicBlock *, int, 8> NewSuccessors;
  1024. // Update the branch weight metadata along the way
  1025. SmallVector<uint64_t, 8> Weights;
  1026. bool PredHasWeights = hasBranchWeightMD(*PTI);
  1027. bool SuccHasWeights = hasBranchWeightMD(*TI);
  1028. if (PredHasWeights) {
  1029. GetBranchWeights(PTI, Weights);
  1030. // branch-weight metadata is inconsistent here.
  1031. if (Weights.size() != 1 + PredCases.size())
  1032. PredHasWeights = SuccHasWeights = false;
  1033. } else if (SuccHasWeights)
  1034. // If there are no predecessor weights but there are successor weights,
  1035. // populate Weights with 1, which will later be scaled to the sum of
  1036. // successor's weights
  1037. Weights.assign(1 + PredCases.size(), 1);
  1038. SmallVector<uint64_t, 8> SuccWeights;
  1039. if (SuccHasWeights) {
  1040. GetBranchWeights(TI, SuccWeights);
  1041. // branch-weight metadata is inconsistent here.
  1042. if (SuccWeights.size() != 1 + BBCases.size())
  1043. PredHasWeights = SuccHasWeights = false;
  1044. } else if (PredHasWeights)
  1045. SuccWeights.assign(1 + BBCases.size(), 1);
  1046. if (PredDefault == BB) {
  1047. // If this is the default destination from PTI, only the edges in TI
  1048. // that don't occur in PTI, or that branch to BB will be activated.
  1049. std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
  1050. for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
  1051. if (PredCases[i].Dest != BB)
  1052. PTIHandled.insert(PredCases[i].Value);
  1053. else {
  1054. // The default destination is BB, we don't need explicit targets.
  1055. std::swap(PredCases[i], PredCases.back());
  1056. if (PredHasWeights || SuccHasWeights) {
  1057. // Increase weight for the default case.
  1058. Weights[0] += Weights[i + 1];
  1059. std::swap(Weights[i + 1], Weights.back());
  1060. Weights.pop_back();
  1061. }
  1062. PredCases.pop_back();
  1063. --i;
  1064. --e;
  1065. }
  1066. // Reconstruct the new switch statement we will be building.
  1067. if (PredDefault != BBDefault) {
  1068. PredDefault->removePredecessor(Pred);
  1069. if (DTU && PredDefault != BB)
  1070. Updates.push_back({DominatorTree::Delete, Pred, PredDefault});
  1071. PredDefault = BBDefault;
  1072. ++NewSuccessors[BBDefault];
  1073. }
  1074. unsigned CasesFromPred = Weights.size();
  1075. uint64_t ValidTotalSuccWeight = 0;
  1076. for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
  1077. if (!PTIHandled.count(BBCases[i].Value) && BBCases[i].Dest != BBDefault) {
  1078. PredCases.push_back(BBCases[i]);
  1079. ++NewSuccessors[BBCases[i].Dest];
  1080. if (SuccHasWeights || PredHasWeights) {
  1081. // The default weight is at index 0, so weight for the ith case
  1082. // should be at index i+1. Scale the cases from successor by
  1083. // PredDefaultWeight (Weights[0]).
  1084. Weights.push_back(Weights[0] * SuccWeights[i + 1]);
  1085. ValidTotalSuccWeight += SuccWeights[i + 1];
  1086. }
  1087. }
  1088. if (SuccHasWeights || PredHasWeights) {
  1089. ValidTotalSuccWeight += SuccWeights[0];
  1090. // Scale the cases from predecessor by ValidTotalSuccWeight.
  1091. for (unsigned i = 1; i < CasesFromPred; ++i)
  1092. Weights[i] *= ValidTotalSuccWeight;
  1093. // Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
  1094. Weights[0] *= SuccWeights[0];
  1095. }
  1096. } else {
  1097. // If this is not the default destination from PSI, only the edges
  1098. // in SI that occur in PSI with a destination of BB will be
  1099. // activated.
  1100. std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
  1101. std::map<ConstantInt *, uint64_t> WeightsForHandled;
  1102. for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
  1103. if (PredCases[i].Dest == BB) {
  1104. PTIHandled.insert(PredCases[i].Value);
  1105. if (PredHasWeights || SuccHasWeights) {
  1106. WeightsForHandled[PredCases[i].Value] = Weights[i + 1];
  1107. std::swap(Weights[i + 1], Weights.back());
  1108. Weights.pop_back();
  1109. }
  1110. std::swap(PredCases[i], PredCases.back());
  1111. PredCases.pop_back();
  1112. --i;
  1113. --e;
  1114. }
  1115. // Okay, now we know which constants were sent to BB from the
  1116. // predecessor. Figure out where they will all go now.
  1117. for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
  1118. if (PTIHandled.count(BBCases[i].Value)) {
  1119. // If this is one we are capable of getting...
  1120. if (PredHasWeights || SuccHasWeights)
  1121. Weights.push_back(WeightsForHandled[BBCases[i].Value]);
  1122. PredCases.push_back(BBCases[i]);
  1123. ++NewSuccessors[BBCases[i].Dest];
  1124. PTIHandled.erase(BBCases[i].Value); // This constant is taken care of
  1125. }
  1126. // If there are any constants vectored to BB that TI doesn't handle,
  1127. // they must go to the default destination of TI.
  1128. for (ConstantInt *I : PTIHandled) {
  1129. if (PredHasWeights || SuccHasWeights)
  1130. Weights.push_back(WeightsForHandled[I]);
  1131. PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault));
  1132. ++NewSuccessors[BBDefault];
  1133. }
  1134. }
  1135. // Okay, at this point, we know which new successor Pred will get. Make
  1136. // sure we update the number of entries in the PHI nodes for these
  1137. // successors.
  1138. SmallPtrSet<BasicBlock *, 2> SuccsOfPred;
  1139. if (DTU) {
  1140. SuccsOfPred = {succ_begin(Pred), succ_end(Pred)};
  1141. Updates.reserve(Updates.size() + NewSuccessors.size());
  1142. }
  1143. for (const std::pair<BasicBlock *, int /*Num*/> &NewSuccessor :
  1144. NewSuccessors) {
  1145. for (auto I : seq(0, NewSuccessor.second)) {
  1146. (void)I;
  1147. AddPredecessorToBlock(NewSuccessor.first, Pred, BB);
  1148. }
  1149. if (DTU && !SuccsOfPred.contains(NewSuccessor.first))
  1150. Updates.push_back({DominatorTree::Insert, Pred, NewSuccessor.first});
  1151. }
  1152. Builder.SetInsertPoint(PTI);
  1153. // Convert pointer to int before we switch.
  1154. if (CV->getType()->isPointerTy()) {
  1155. CV =
  1156. Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()), "magicptr");
  1157. }
  1158. // Now that the successors are updated, create the new Switch instruction.
  1159. SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault, PredCases.size());
  1160. NewSI->setDebugLoc(PTI->getDebugLoc());
  1161. for (ValueEqualityComparisonCase &V : PredCases)
  1162. NewSI->addCase(V.Value, V.Dest);
  1163. if (PredHasWeights || SuccHasWeights) {
  1164. // Halve the weights if any of them cannot fit in an uint32_t
  1165. FitWeights(Weights);
  1166. SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
  1167. setBranchWeights(NewSI, MDWeights);
  1168. }
  1169. EraseTerminatorAndDCECond(PTI);
  1170. // Okay, last check. If BB is still a successor of PSI, then we must
  1171. // have an infinite loop case. If so, add an infinitely looping block
  1172. // to handle the case to preserve the behavior of the code.
  1173. BasicBlock *InfLoopBlock = nullptr;
  1174. for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
  1175. if (NewSI->getSuccessor(i) == BB) {
  1176. if (!InfLoopBlock) {
  1177. // Insert it at the end of the function, because it's either code,
  1178. // or it won't matter if it's hot. :)
  1179. InfLoopBlock =
  1180. BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
  1181. BranchInst::Create(InfLoopBlock, InfLoopBlock);
  1182. if (DTU)
  1183. Updates.push_back(
  1184. {DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
  1185. }
  1186. NewSI->setSuccessor(i, InfLoopBlock);
  1187. }
  1188. if (DTU) {
  1189. if (InfLoopBlock)
  1190. Updates.push_back({DominatorTree::Insert, Pred, InfLoopBlock});
  1191. Updates.push_back({DominatorTree::Delete, Pred, BB});
  1192. DTU->applyUpdates(Updates);
  1193. }
  1194. ++NumFoldValueComparisonIntoPredecessors;
  1195. return true;
  1196. }
  1197. /// The specified terminator is a value equality comparison instruction
  1198. /// (either a switch or a branch on "X == c").
  1199. /// See if any of the predecessors of the terminator block are value comparisons
  1200. /// on the same value. If so, and if safe to do so, fold them together.
  1201. bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(Instruction *TI,
  1202. IRBuilder<> &Builder) {
  1203. BasicBlock *BB = TI->getParent();
  1204. Value *CV = isValueEqualityComparison(TI); // CondVal
  1205. assert(CV && "Not a comparison?");
  1206. bool Changed = false;
  1207. SmallSetVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
  1208. while (!Preds.empty()) {
  1209. BasicBlock *Pred = Preds.pop_back_val();
  1210. Instruction *PTI = Pred->getTerminator();
  1211. // Don't try to fold into itself.
  1212. if (Pred == BB)
  1213. continue;
  1214. // See if the predecessor is a comparison with the same value.
  1215. Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
  1216. if (PCV != CV)
  1217. continue;
  1218. SmallSetVector<BasicBlock *, 4> FailBlocks;
  1219. if (!SafeToMergeTerminators(TI, PTI, &FailBlocks)) {
  1220. for (auto *Succ : FailBlocks) {
  1221. if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split", DTU))
  1222. return false;
  1223. }
  1224. }
  1225. PerformValueComparisonIntoPredecessorFolding(TI, CV, PTI, Builder);
  1226. Changed = true;
  1227. }
  1228. return Changed;
  1229. }
  1230. // If we would need to insert a select that uses the value of this invoke
  1231. // (comments in HoistThenElseCodeToIf explain why we would need to do this), we
  1232. // can't hoist the invoke, as there is nowhere to put the select in this case.
  1233. static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2,
  1234. Instruction *I1, Instruction *I2) {
  1235. for (BasicBlock *Succ : successors(BB1)) {
  1236. for (const PHINode &PN : Succ->phis()) {
  1237. Value *BB1V = PN.getIncomingValueForBlock(BB1);
  1238. Value *BB2V = PN.getIncomingValueForBlock(BB2);
  1239. if (BB1V != BB2V && (BB1V == I1 || BB2V == I2)) {
  1240. return false;
  1241. }
  1242. }
  1243. }
  1244. return true;
  1245. }
  1246. // Get interesting characteristics of instructions that `HoistThenElseCodeToIf`
  1247. // didn't hoist. They restrict what kind of instructions can be reordered
  1248. // across.
  1249. enum SkipFlags {
  1250. SkipReadMem = 1,
  1251. SkipSideEffect = 2,
  1252. SkipImplicitControlFlow = 4
  1253. };
  1254. static unsigned skippedInstrFlags(Instruction *I) {
  1255. unsigned Flags = 0;
  1256. if (I->mayReadFromMemory())
  1257. Flags |= SkipReadMem;
  1258. // We can't arbitrarily move around allocas, e.g. moving allocas (especially
  1259. // inalloca) across stacksave/stackrestore boundaries.
  1260. if (I->mayHaveSideEffects() || isa<AllocaInst>(I))
  1261. Flags |= SkipSideEffect;
  1262. if (!isGuaranteedToTransferExecutionToSuccessor(I))
  1263. Flags |= SkipImplicitControlFlow;
  1264. return Flags;
  1265. }
  1266. // Returns true if it is safe to reorder an instruction across preceding
  1267. // instructions in a basic block.
  1268. static bool isSafeToHoistInstr(Instruction *I, unsigned Flags) {
  1269. // Don't reorder a store over a load.
  1270. if ((Flags & SkipReadMem) && I->mayWriteToMemory())
  1271. return false;
  1272. // If we have seen an instruction with side effects, it's unsafe to reorder an
  1273. // instruction which reads memory or itself has side effects.
  1274. if ((Flags & SkipSideEffect) &&
  1275. (I->mayReadFromMemory() || I->mayHaveSideEffects()))
  1276. return false;
  1277. // Reordering across an instruction which does not necessarily transfer
  1278. // control to the next instruction is speculation.
  1279. if ((Flags & SkipImplicitControlFlow) && !isSafeToSpeculativelyExecute(I))
  1280. return false;
  1281. // Hoisting of llvm.deoptimize is only legal together with the next return
  1282. // instruction, which this pass is not always able to do.
  1283. if (auto *CB = dyn_cast<CallBase>(I))
  1284. if (CB->getIntrinsicID() == Intrinsic::experimental_deoptimize)
  1285. return false;
  1286. // It's also unsafe/illegal to hoist an instruction above its instruction
  1287. // operands
  1288. BasicBlock *BB = I->getParent();
  1289. for (Value *Op : I->operands()) {
  1290. if (auto *J = dyn_cast<Instruction>(Op))
  1291. if (J->getParent() == BB)
  1292. return false;
  1293. }
  1294. return true;
  1295. }
  1296. static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false);
  1297. /// Given a conditional branch that goes to BB1 and BB2, hoist any common code
  1298. /// in the two blocks up into the branch block. The caller of this function
  1299. /// guarantees that BI's block dominates BB1 and BB2. If EqTermsOnly is given,
  1300. /// only perform hoisting in case both blocks only contain a terminator. In that
  1301. /// case, only the original BI will be replaced and selects for PHIs are added.
  1302. bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI,
  1303. const TargetTransformInfo &TTI,
  1304. bool EqTermsOnly) {
  1305. // This does very trivial matching, with limited scanning, to find identical
  1306. // instructions in the two blocks. In particular, we don't want to get into
  1307. // O(M*N) situations here where M and N are the sizes of BB1 and BB2. As
  1308. // such, we currently just scan for obviously identical instructions in an
  1309. // identical order, possibly separated by the same number of non-identical
  1310. // instructions.
  1311. BasicBlock *BB1 = BI->getSuccessor(0); // The true destination.
  1312. BasicBlock *BB2 = BI->getSuccessor(1); // The false destination
  1313. // If either of the blocks has it's address taken, then we can't do this fold,
  1314. // because the code we'd hoist would no longer run when we jump into the block
  1315. // by it's address.
  1316. if (BB1->hasAddressTaken() || BB2->hasAddressTaken())
  1317. return false;
  1318. BasicBlock::iterator BB1_Itr = BB1->begin();
  1319. BasicBlock::iterator BB2_Itr = BB2->begin();
  1320. Instruction *I1 = &*BB1_Itr++, *I2 = &*BB2_Itr++;
  1321. // Skip debug info if it is not identical.
  1322. DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1);
  1323. DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2);
  1324. if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) {
  1325. while (isa<DbgInfoIntrinsic>(I1))
  1326. I1 = &*BB1_Itr++;
  1327. while (isa<DbgInfoIntrinsic>(I2))
  1328. I2 = &*BB2_Itr++;
  1329. }
  1330. if (isa<PHINode>(I1))
  1331. return false;
  1332. BasicBlock *BIParent = BI->getParent();
  1333. bool Changed = false;
  1334. auto _ = make_scope_exit([&]() {
  1335. if (Changed)
  1336. ++NumHoistCommonCode;
  1337. });
  1338. // Check if only hoisting terminators is allowed. This does not add new
  1339. // instructions to the hoist location.
  1340. if (EqTermsOnly) {
  1341. // Skip any debug intrinsics, as they are free to hoist.
  1342. auto *I1NonDbg = &*skipDebugIntrinsics(I1->getIterator());
  1343. auto *I2NonDbg = &*skipDebugIntrinsics(I2->getIterator());
  1344. if (!I1NonDbg->isIdenticalToWhenDefined(I2NonDbg))
  1345. return false;
  1346. if (!I1NonDbg->isTerminator())
  1347. return false;
  1348. // Now we know that we only need to hoist debug intrinsics and the
  1349. // terminator. Let the loop below handle those 2 cases.
  1350. }
  1351. // Count how many instructions were not hoisted so far. There's a limit on how
  1352. // many instructions we skip, serving as a compilation time control as well as
  1353. // preventing excessive increase of life ranges.
  1354. unsigned NumSkipped = 0;
  1355. // Record any skipped instuctions that may read memory, write memory or have
  1356. // side effects, or have implicit control flow.
  1357. unsigned SkipFlagsBB1 = 0;
  1358. unsigned SkipFlagsBB2 = 0;
  1359. for (;;) {
  1360. // If we are hoisting the terminator instruction, don't move one (making a
  1361. // broken BB), instead clone it, and remove BI.
  1362. if (I1->isTerminator() || I2->isTerminator()) {
  1363. // If any instructions remain in the block, we cannot hoist terminators.
  1364. if (NumSkipped || !I1->isIdenticalToWhenDefined(I2))
  1365. return Changed;
  1366. goto HoistTerminator;
  1367. }
  1368. if (I1->isIdenticalToWhenDefined(I2)) {
  1369. // Even if the instructions are identical, it may not be safe to hoist
  1370. // them if we have skipped over instructions with side effects or their
  1371. // operands weren't hoisted.
  1372. if (!isSafeToHoistInstr(I1, SkipFlagsBB1) ||
  1373. !isSafeToHoistInstr(I2, SkipFlagsBB2))
  1374. return Changed;
  1375. // If we're going to hoist a call, make sure that the two instructions
  1376. // we're commoning/hoisting are both marked with musttail, or neither of
  1377. // them is marked as such. Otherwise, we might end up in a situation where
  1378. // we hoist from a block where the terminator is a `ret` to a block where
  1379. // the terminator is a `br`, and `musttail` calls expect to be followed by
  1380. // a return.
  1381. auto *C1 = dyn_cast<CallInst>(I1);
  1382. auto *C2 = dyn_cast<CallInst>(I2);
  1383. if (C1 && C2)
  1384. if (C1->isMustTailCall() != C2->isMustTailCall())
  1385. return Changed;
  1386. if (!TTI.isProfitableToHoist(I1) || !TTI.isProfitableToHoist(I2))
  1387. return Changed;
  1388. // If any of the two call sites has nomerge attribute, stop hoisting.
  1389. if (const auto *CB1 = dyn_cast<CallBase>(I1))
  1390. if (CB1->cannotMerge())
  1391. return Changed;
  1392. if (const auto *CB2 = dyn_cast<CallBase>(I2))
  1393. if (CB2->cannotMerge())
  1394. return Changed;
  1395. if (isa<DbgInfoIntrinsic>(I1) || isa<DbgInfoIntrinsic>(I2)) {
  1396. assert(isa<DbgInfoIntrinsic>(I1) && isa<DbgInfoIntrinsic>(I2));
  1397. // The debug location is an integral part of a debug info intrinsic
  1398. // and can't be separated from it or replaced. Instead of attempting
  1399. // to merge locations, simply hoist both copies of the intrinsic.
  1400. BIParent->splice(BI->getIterator(), BB1, I1->getIterator());
  1401. BIParent->splice(BI->getIterator(), BB2, I2->getIterator());
  1402. } else {
  1403. // For a normal instruction, we just move one to right before the
  1404. // branch, then replace all uses of the other with the first. Finally,
  1405. // we remove the now redundant second instruction.
  1406. BIParent->splice(BI->getIterator(), BB1, I1->getIterator());
  1407. if (!I2->use_empty())
  1408. I2->replaceAllUsesWith(I1);
  1409. I1->andIRFlags(I2);
  1410. unsigned KnownIDs[] = {LLVMContext::MD_tbaa,
  1411. LLVMContext::MD_range,
  1412. LLVMContext::MD_fpmath,
  1413. LLVMContext::MD_invariant_load,
  1414. LLVMContext::MD_nonnull,
  1415. LLVMContext::MD_invariant_group,
  1416. LLVMContext::MD_align,
  1417. LLVMContext::MD_dereferenceable,
  1418. LLVMContext::MD_dereferenceable_or_null,
  1419. LLVMContext::MD_mem_parallel_loop_access,
  1420. LLVMContext::MD_access_group,
  1421. LLVMContext::MD_preserve_access_index};
  1422. combineMetadata(I1, I2, KnownIDs, true);
  1423. // I1 and I2 are being combined into a single instruction. Its debug
  1424. // location is the merged locations of the original instructions.
  1425. I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
  1426. I2->eraseFromParent();
  1427. }
  1428. Changed = true;
  1429. ++NumHoistCommonInstrs;
  1430. } else {
  1431. if (NumSkipped >= HoistCommonSkipLimit)
  1432. return Changed;
  1433. // We are about to skip over a pair of non-identical instructions. Record
  1434. // if any have characteristics that would prevent reordering instructions
  1435. // across them.
  1436. SkipFlagsBB1 |= skippedInstrFlags(I1);
  1437. SkipFlagsBB2 |= skippedInstrFlags(I2);
  1438. ++NumSkipped;
  1439. }
  1440. I1 = &*BB1_Itr++;
  1441. I2 = &*BB2_Itr++;
  1442. // Skip debug info if it is not identical.
  1443. DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1);
  1444. DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2);
  1445. if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) {
  1446. while (isa<DbgInfoIntrinsic>(I1))
  1447. I1 = &*BB1_Itr++;
  1448. while (isa<DbgInfoIntrinsic>(I2))
  1449. I2 = &*BB2_Itr++;
  1450. }
  1451. }
  1452. return Changed;
  1453. HoistTerminator:
  1454. // It may not be possible to hoist an invoke.
  1455. // FIXME: Can we define a safety predicate for CallBr?
  1456. if (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2))
  1457. return Changed;
  1458. // TODO: callbr hoisting currently disabled pending further study.
  1459. if (isa<CallBrInst>(I1))
  1460. return Changed;
  1461. for (BasicBlock *Succ : successors(BB1)) {
  1462. for (PHINode &PN : Succ->phis()) {
  1463. Value *BB1V = PN.getIncomingValueForBlock(BB1);
  1464. Value *BB2V = PN.getIncomingValueForBlock(BB2);
  1465. if (BB1V == BB2V)
  1466. continue;
  1467. // Check for passingValueIsAlwaysUndefined here because we would rather
  1468. // eliminate undefined control flow then converting it to a select.
  1469. if (passingValueIsAlwaysUndefined(BB1V, &PN) ||
  1470. passingValueIsAlwaysUndefined(BB2V, &PN))
  1471. return Changed;
  1472. }
  1473. }
  1474. // Okay, it is safe to hoist the terminator.
  1475. Instruction *NT = I1->clone();
  1476. NT->insertInto(BIParent, BI->getIterator());
  1477. if (!NT->getType()->isVoidTy()) {
  1478. I1->replaceAllUsesWith(NT);
  1479. I2->replaceAllUsesWith(NT);
  1480. NT->takeName(I1);
  1481. }
  1482. Changed = true;
  1483. ++NumHoistCommonInstrs;
  1484. // Ensure terminator gets a debug location, even an unknown one, in case
  1485. // it involves inlinable calls.
  1486. NT->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
  1487. // PHIs created below will adopt NT's merged DebugLoc.
  1488. IRBuilder<NoFolder> Builder(NT);
  1489. // Hoisting one of the terminators from our successor is a great thing.
  1490. // Unfortunately, the successors of the if/else blocks may have PHI nodes in
  1491. // them. If they do, all PHI entries for BB1/BB2 must agree for all PHI
  1492. // nodes, so we insert select instruction to compute the final result.
  1493. std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects;
  1494. for (BasicBlock *Succ : successors(BB1)) {
  1495. for (PHINode &PN : Succ->phis()) {
  1496. Value *BB1V = PN.getIncomingValueForBlock(BB1);
  1497. Value *BB2V = PN.getIncomingValueForBlock(BB2);
  1498. if (BB1V == BB2V)
  1499. continue;
  1500. // These values do not agree. Insert a select instruction before NT
  1501. // that determines the right value.
  1502. SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
  1503. if (!SI) {
  1504. // Propagate fast-math-flags from phi node to its replacement select.
  1505. IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
  1506. if (isa<FPMathOperator>(PN))
  1507. Builder.setFastMathFlags(PN.getFastMathFlags());
  1508. SI = cast<SelectInst>(
  1509. Builder.CreateSelect(BI->getCondition(), BB1V, BB2V,
  1510. BB1V->getName() + "." + BB2V->getName(), BI));
  1511. }
  1512. // Make the PHI node use the select for all incoming values for BB1/BB2
  1513. for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
  1514. if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2)
  1515. PN.setIncomingValue(i, SI);
  1516. }
  1517. }
  1518. SmallVector<DominatorTree::UpdateType, 4> Updates;
  1519. // Update any PHI nodes in our new successors.
  1520. for (BasicBlock *Succ : successors(BB1)) {
  1521. AddPredecessorToBlock(Succ, BIParent, BB1);
  1522. if (DTU)
  1523. Updates.push_back({DominatorTree::Insert, BIParent, Succ});
  1524. }
  1525. if (DTU)
  1526. for (BasicBlock *Succ : successors(BI))
  1527. Updates.push_back({DominatorTree::Delete, BIParent, Succ});
  1528. EraseTerminatorAndDCECond(BI);
  1529. if (DTU)
  1530. DTU->applyUpdates(Updates);
  1531. return Changed;
  1532. }
  1533. // Check lifetime markers.
  1534. static bool isLifeTimeMarker(const Instruction *I) {
  1535. if (auto II = dyn_cast<IntrinsicInst>(I)) {
  1536. switch (II->getIntrinsicID()) {
  1537. default:
  1538. break;
  1539. case Intrinsic::lifetime_start:
  1540. case Intrinsic::lifetime_end:
  1541. return true;
  1542. }
  1543. }
  1544. return false;
  1545. }
  1546. // TODO: Refine this. This should avoid cases like turning constant memcpy sizes
  1547. // into variables.
  1548. static bool replacingOperandWithVariableIsCheap(const Instruction *I,
  1549. int OpIdx) {
  1550. return !isa<IntrinsicInst>(I);
  1551. }
  1552. // All instructions in Insts belong to different blocks that all unconditionally
  1553. // branch to a common successor. Analyze each instruction and return true if it
  1554. // would be possible to sink them into their successor, creating one common
  1555. // instruction instead. For every value that would be required to be provided by
  1556. // PHI node (because an operand varies in each input block), add to PHIOperands.
  1557. static bool canSinkInstructions(
  1558. ArrayRef<Instruction *> Insts,
  1559. DenseMap<Instruction *, SmallVector<Value *, 4>> &PHIOperands) {
  1560. // Prune out obviously bad instructions to move. Each instruction must have
  1561. // exactly zero or one use, and we check later that use is by a single, common
  1562. // PHI instruction in the successor.
  1563. bool HasUse = !Insts.front()->user_empty();
  1564. for (auto *I : Insts) {
  1565. // These instructions may change or break semantics if moved.
  1566. if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
  1567. I->getType()->isTokenTy())
  1568. return false;
  1569. // Do not try to sink an instruction in an infinite loop - it can cause
  1570. // this algorithm to infinite loop.
  1571. if (I->getParent()->getSingleSuccessor() == I->getParent())
  1572. return false;
  1573. // Conservatively return false if I is an inline-asm instruction. Sinking
  1574. // and merging inline-asm instructions can potentially create arguments
  1575. // that cannot satisfy the inline-asm constraints.
  1576. // If the instruction has nomerge attribute, return false.
  1577. if (const auto *C = dyn_cast<CallBase>(I))
  1578. if (C->isInlineAsm() || C->cannotMerge())
  1579. return false;
  1580. // Each instruction must have zero or one use.
  1581. if (HasUse && !I->hasOneUse())
  1582. return false;
  1583. if (!HasUse && !I->user_empty())
  1584. return false;
  1585. }
  1586. const Instruction *I0 = Insts.front();
  1587. for (auto *I : Insts)
  1588. if (!I->isSameOperationAs(I0))
  1589. return false;
  1590. // All instructions in Insts are known to be the same opcode. If they have a
  1591. // use, check that the only user is a PHI or in the same block as the
  1592. // instruction, because if a user is in the same block as an instruction we're
  1593. // contemplating sinking, it must already be determined to be sinkable.
  1594. if (HasUse) {
  1595. auto *PNUse = dyn_cast<PHINode>(*I0->user_begin());
  1596. auto *Succ = I0->getParent()->getTerminator()->getSuccessor(0);
  1597. if (!all_of(Insts, [&PNUse,&Succ](const Instruction *I) -> bool {
  1598. auto *U = cast<Instruction>(*I->user_begin());
  1599. return (PNUse &&
  1600. PNUse->getParent() == Succ &&
  1601. PNUse->getIncomingValueForBlock(I->getParent()) == I) ||
  1602. U->getParent() == I->getParent();
  1603. }))
  1604. return false;
  1605. }
  1606. // Because SROA can't handle speculating stores of selects, try not to sink
  1607. // loads, stores or lifetime markers of allocas when we'd have to create a
  1608. // PHI for the address operand. Also, because it is likely that loads or
  1609. // stores of allocas will disappear when Mem2Reg/SROA is run, don't sink
  1610. // them.
  1611. // This can cause code churn which can have unintended consequences down
  1612. // the line - see https://llvm.org/bugs/show_bug.cgi?id=30244.
  1613. // FIXME: This is a workaround for a deficiency in SROA - see
  1614. // https://llvm.org/bugs/show_bug.cgi?id=30188
  1615. if (isa<StoreInst>(I0) && any_of(Insts, [](const Instruction *I) {
  1616. return isa<AllocaInst>(I->getOperand(1)->stripPointerCasts());
  1617. }))
  1618. return false;
  1619. if (isa<LoadInst>(I0) && any_of(Insts, [](const Instruction *I) {
  1620. return isa<AllocaInst>(I->getOperand(0)->stripPointerCasts());
  1621. }))
  1622. return false;
  1623. if (isLifeTimeMarker(I0) && any_of(Insts, [](const Instruction *I) {
  1624. return isa<AllocaInst>(I->getOperand(1)->stripPointerCasts());
  1625. }))
  1626. return false;
  1627. // For calls to be sinkable, they must all be indirect, or have same callee.
  1628. // I.e. if we have two direct calls to different callees, we don't want to
  1629. // turn that into an indirect call. Likewise, if we have an indirect call,
  1630. // and a direct call, we don't actually want to have a single indirect call.
  1631. if (isa<CallBase>(I0)) {
  1632. auto IsIndirectCall = [](const Instruction *I) {
  1633. return cast<CallBase>(I)->isIndirectCall();
  1634. };
  1635. bool HaveIndirectCalls = any_of(Insts, IsIndirectCall);
  1636. bool AllCallsAreIndirect = all_of(Insts, IsIndirectCall);
  1637. if (HaveIndirectCalls) {
  1638. if (!AllCallsAreIndirect)
  1639. return false;
  1640. } else {
  1641. // All callees must be identical.
  1642. Value *Callee = nullptr;
  1643. for (const Instruction *I : Insts) {
  1644. Value *CurrCallee = cast<CallBase>(I)->getCalledOperand();
  1645. if (!Callee)
  1646. Callee = CurrCallee;
  1647. else if (Callee != CurrCallee)
  1648. return false;
  1649. }
  1650. }
  1651. }
  1652. for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) {
  1653. Value *Op = I0->getOperand(OI);
  1654. if (Op->getType()->isTokenTy())
  1655. // Don't touch any operand of token type.
  1656. return false;
  1657. auto SameAsI0 = [&I0, OI](const Instruction *I) {
  1658. assert(I->getNumOperands() == I0->getNumOperands());
  1659. return I->getOperand(OI) == I0->getOperand(OI);
  1660. };
  1661. if (!all_of(Insts, SameAsI0)) {
  1662. if ((isa<Constant>(Op) && !replacingOperandWithVariableIsCheap(I0, OI)) ||
  1663. !canReplaceOperandWithVariable(I0, OI))
  1664. // We can't create a PHI from this GEP.
  1665. return false;
  1666. for (auto *I : Insts)
  1667. PHIOperands[I].push_back(I->getOperand(OI));
  1668. }
  1669. }
  1670. return true;
  1671. }
  1672. // Assuming canSinkInstructions(Blocks) has returned true, sink the last
  1673. // instruction of every block in Blocks to their common successor, commoning
  1674. // into one instruction.
  1675. static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
  1676. auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0);
  1677. // canSinkInstructions returning true guarantees that every block has at
  1678. // least one non-terminator instruction.
  1679. SmallVector<Instruction*,4> Insts;
  1680. for (auto *BB : Blocks) {
  1681. Instruction *I = BB->getTerminator();
  1682. do {
  1683. I = I->getPrevNode();
  1684. } while (isa<DbgInfoIntrinsic>(I) && I != &BB->front());
  1685. if (!isa<DbgInfoIntrinsic>(I))
  1686. Insts.push_back(I);
  1687. }
  1688. // The only checking we need to do now is that all users of all instructions
  1689. // are the same PHI node. canSinkInstructions should have checked this but
  1690. // it is slightly over-aggressive - it gets confused by commutative
  1691. // instructions so double-check it here.
  1692. Instruction *I0 = Insts.front();
  1693. if (!I0->user_empty()) {
  1694. auto *PNUse = dyn_cast<PHINode>(*I0->user_begin());
  1695. if (!all_of(Insts, [&PNUse](const Instruction *I) -> bool {
  1696. auto *U = cast<Instruction>(*I->user_begin());
  1697. return U == PNUse;
  1698. }))
  1699. return false;
  1700. }
  1701. // We don't need to do any more checking here; canSinkInstructions should
  1702. // have done it all for us.
  1703. SmallVector<Value*, 4> NewOperands;
  1704. for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
  1705. // This check is different to that in canSinkInstructions. There, we
  1706. // cared about the global view once simplifycfg (and instcombine) have
  1707. // completed - it takes into account PHIs that become trivially
  1708. // simplifiable. However here we need a more local view; if an operand
  1709. // differs we create a PHI and rely on instcombine to clean up the very
  1710. // small mess we may make.
  1711. bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) {
  1712. return I->getOperand(O) != I0->getOperand(O);
  1713. });
  1714. if (!NeedPHI) {
  1715. NewOperands.push_back(I0->getOperand(O));
  1716. continue;
  1717. }
  1718. // Create a new PHI in the successor block and populate it.
  1719. auto *Op = I0->getOperand(O);
  1720. assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
  1721. auto *PN = PHINode::Create(Op->getType(), Insts.size(),
  1722. Op->getName() + ".sink", &BBEnd->front());
  1723. for (auto *I : Insts)
  1724. PN->addIncoming(I->getOperand(O), I->getParent());
  1725. NewOperands.push_back(PN);
  1726. }
  1727. // Arbitrarily use I0 as the new "common" instruction; remap its operands
  1728. // and move it to the start of the successor block.
  1729. for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
  1730. I0->getOperandUse(O).set(NewOperands[O]);
  1731. I0->moveBefore(&*BBEnd->getFirstInsertionPt());
  1732. // Update metadata and IR flags, and merge debug locations.
  1733. for (auto *I : Insts)
  1734. if (I != I0) {
  1735. // The debug location for the "common" instruction is the merged locations
  1736. // of all the commoned instructions. We start with the original location
  1737. // of the "common" instruction and iteratively merge each location in the
  1738. // loop below.
  1739. // This is an N-way merge, which will be inefficient if I0 is a CallInst.
  1740. // However, as N-way merge for CallInst is rare, so we use simplified API
  1741. // instead of using complex API for N-way merge.
  1742. I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc());
  1743. combineMetadataForCSE(I0, I, true);
  1744. I0->andIRFlags(I);
  1745. }
  1746. if (!I0->user_empty()) {
  1747. // canSinkLastInstruction checked that all instructions were used by
  1748. // one and only one PHI node. Find that now, RAUW it to our common
  1749. // instruction and nuke it.
  1750. auto *PN = cast<PHINode>(*I0->user_begin());
  1751. PN->replaceAllUsesWith(I0);
  1752. PN->eraseFromParent();
  1753. }
  1754. // Finally nuke all instructions apart from the common instruction.
  1755. for (auto *I : Insts) {
  1756. if (I == I0)
  1757. continue;
  1758. // The remaining uses are debug users, replace those with the common inst.
  1759. // In most (all?) cases this just introduces a use-before-def.
  1760. assert(I->user_empty() && "Inst unexpectedly still has non-dbg users");
  1761. I->replaceAllUsesWith(I0);
  1762. I->eraseFromParent();
  1763. }
  1764. return true;
  1765. }
  1766. namespace {
  1767. // LockstepReverseIterator - Iterates through instructions
  1768. // in a set of blocks in reverse order from the first non-terminator.
  1769. // For example (assume all blocks have size n):
  1770. // LockstepReverseIterator I([B1, B2, B3]);
  1771. // *I-- = [B1[n], B2[n], B3[n]];
  1772. // *I-- = [B1[n-1], B2[n-1], B3[n-1]];
  1773. // *I-- = [B1[n-2], B2[n-2], B3[n-2]];
  1774. // ...
  1775. class LockstepReverseIterator {
  1776. ArrayRef<BasicBlock*> Blocks;
  1777. SmallVector<Instruction*,4> Insts;
  1778. bool Fail;
  1779. public:
  1780. LockstepReverseIterator(ArrayRef<BasicBlock*> Blocks) : Blocks(Blocks) {
  1781. reset();
  1782. }
  1783. void reset() {
  1784. Fail = false;
  1785. Insts.clear();
  1786. for (auto *BB : Blocks) {
  1787. Instruction *Inst = BB->getTerminator();
  1788. for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
  1789. Inst = Inst->getPrevNode();
  1790. if (!Inst) {
  1791. // Block wasn't big enough.
  1792. Fail = true;
  1793. return;
  1794. }
  1795. Insts.push_back(Inst);
  1796. }
  1797. }
  1798. bool isValid() const {
  1799. return !Fail;
  1800. }
  1801. void operator--() {
  1802. if (Fail)
  1803. return;
  1804. for (auto *&Inst : Insts) {
  1805. for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
  1806. Inst = Inst->getPrevNode();
  1807. // Already at beginning of block.
  1808. if (!Inst) {
  1809. Fail = true;
  1810. return;
  1811. }
  1812. }
  1813. }
  1814. void operator++() {
  1815. if (Fail)
  1816. return;
  1817. for (auto *&Inst : Insts) {
  1818. for (Inst = Inst->getNextNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
  1819. Inst = Inst->getNextNode();
  1820. // Already at end of block.
  1821. if (!Inst) {
  1822. Fail = true;
  1823. return;
  1824. }
  1825. }
  1826. }
  1827. ArrayRef<Instruction*> operator * () const {
  1828. return Insts;
  1829. }
  1830. };
  1831. } // end anonymous namespace
  1832. /// Check whether BB's predecessors end with unconditional branches. If it is
  1833. /// true, sink any common code from the predecessors to BB.
  1834. static bool SinkCommonCodeFromPredecessors(BasicBlock *BB,
  1835. DomTreeUpdater *DTU) {
  1836. // We support two situations:
  1837. // (1) all incoming arcs are unconditional
  1838. // (2) there are non-unconditional incoming arcs
  1839. //
  1840. // (2) is very common in switch defaults and
  1841. // else-if patterns;
  1842. //
  1843. // if (a) f(1);
  1844. // else if (b) f(2);
  1845. //
  1846. // produces:
  1847. //
  1848. // [if]
  1849. // / \
  1850. // [f(1)] [if]
  1851. // | | \
  1852. // | | |
  1853. // | [f(2)]|
  1854. // \ | /
  1855. // [ end ]
  1856. //
  1857. // [end] has two unconditional predecessor arcs and one conditional. The
  1858. // conditional refers to the implicit empty 'else' arc. This conditional
  1859. // arc can also be caused by an empty default block in a switch.
  1860. //
  1861. // In this case, we attempt to sink code from all *unconditional* arcs.
  1862. // If we can sink instructions from these arcs (determined during the scan
  1863. // phase below) we insert a common successor for all unconditional arcs and
  1864. // connect that to [end], to enable sinking:
  1865. //
  1866. // [if]
  1867. // / \
  1868. // [x(1)] [if]
  1869. // | | \
  1870. // | | \
  1871. // | [x(2)] |
  1872. // \ / |
  1873. // [sink.split] |
  1874. // \ /
  1875. // [ end ]
  1876. //
  1877. SmallVector<BasicBlock*,4> UnconditionalPreds;
  1878. bool HaveNonUnconditionalPredecessors = false;
  1879. for (auto *PredBB : predecessors(BB)) {
  1880. auto *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
  1881. if (PredBr && PredBr->isUnconditional())
  1882. UnconditionalPreds.push_back(PredBB);
  1883. else
  1884. HaveNonUnconditionalPredecessors = true;
  1885. }
  1886. if (UnconditionalPreds.size() < 2)
  1887. return false;
  1888. // We take a two-step approach to tail sinking. First we scan from the end of
  1889. // each block upwards in lockstep. If the n'th instruction from the end of each
  1890. // block can be sunk, those instructions are added to ValuesToSink and we
  1891. // carry on. If we can sink an instruction but need to PHI-merge some operands
  1892. // (because they're not identical in each instruction) we add these to
  1893. // PHIOperands.
  1894. int ScanIdx = 0;
  1895. SmallPtrSet<Value*,4> InstructionsToSink;
  1896. DenseMap<Instruction*, SmallVector<Value*,4>> PHIOperands;
  1897. LockstepReverseIterator LRI(UnconditionalPreds);
  1898. while (LRI.isValid() &&
  1899. canSinkInstructions(*LRI, PHIOperands)) {
  1900. LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0]
  1901. << "\n");
  1902. InstructionsToSink.insert((*LRI).begin(), (*LRI).end());
  1903. ++ScanIdx;
  1904. --LRI;
  1905. }
  1906. // If no instructions can be sunk, early-return.
  1907. if (ScanIdx == 0)
  1908. return false;
  1909. bool followedByDeoptOrUnreachable = IsBlockFollowedByDeoptOrUnreachable(BB);
  1910. if (!followedByDeoptOrUnreachable) {
  1911. // Okay, we *could* sink last ScanIdx instructions. But how many can we
  1912. // actually sink before encountering instruction that is unprofitable to
  1913. // sink?
  1914. auto ProfitableToSinkInstruction = [&](LockstepReverseIterator &LRI) {
  1915. unsigned NumPHIdValues = 0;
  1916. for (auto *I : *LRI)
  1917. for (auto *V : PHIOperands[I]) {
  1918. if (!InstructionsToSink.contains(V))
  1919. ++NumPHIdValues;
  1920. // FIXME: this check is overly optimistic. We may end up not sinking
  1921. // said instruction, due to the very same profitability check.
  1922. // See @creating_too_many_phis in sink-common-code.ll.
  1923. }
  1924. LLVM_DEBUG(dbgs() << "SINK: #phid values: " << NumPHIdValues << "\n");
  1925. unsigned NumPHIInsts = NumPHIdValues / UnconditionalPreds.size();
  1926. if ((NumPHIdValues % UnconditionalPreds.size()) != 0)
  1927. NumPHIInsts++;
  1928. return NumPHIInsts <= 1;
  1929. };
  1930. // We've determined that we are going to sink last ScanIdx instructions,
  1931. // and recorded them in InstructionsToSink. Now, some instructions may be
  1932. // unprofitable to sink. But that determination depends on the instructions
  1933. // that we are going to sink.
  1934. // First, forward scan: find the first instruction unprofitable to sink,
  1935. // recording all the ones that are profitable to sink.
  1936. // FIXME: would it be better, after we detect that not all are profitable.
  1937. // to either record the profitable ones, or erase the unprofitable ones?
  1938. // Maybe we need to choose (at runtime) the one that will touch least
  1939. // instrs?
  1940. LRI.reset();
  1941. int Idx = 0;
  1942. SmallPtrSet<Value *, 4> InstructionsProfitableToSink;
  1943. while (Idx < ScanIdx) {
  1944. if (!ProfitableToSinkInstruction(LRI)) {
  1945. // Too many PHIs would be created.
  1946. LLVM_DEBUG(
  1947. dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
  1948. break;
  1949. }
  1950. InstructionsProfitableToSink.insert((*LRI).begin(), (*LRI).end());
  1951. --LRI;
  1952. ++Idx;
  1953. }
  1954. // If no instructions can be sunk, early-return.
  1955. if (Idx == 0)
  1956. return false;
  1957. // Did we determine that (only) some instructions are unprofitable to sink?
  1958. if (Idx < ScanIdx) {
  1959. // Okay, some instructions are unprofitable.
  1960. ScanIdx = Idx;
  1961. InstructionsToSink = InstructionsProfitableToSink;
  1962. // But, that may make other instructions unprofitable, too.
  1963. // So, do a backward scan, do any earlier instructions become
  1964. // unprofitable?
  1965. assert(
  1966. !ProfitableToSinkInstruction(LRI) &&
  1967. "We already know that the last instruction is unprofitable to sink");
  1968. ++LRI;
  1969. --Idx;
  1970. while (Idx >= 0) {
  1971. // If we detect that an instruction becomes unprofitable to sink,
  1972. // all earlier instructions won't be sunk either,
  1973. // so preemptively keep InstructionsProfitableToSink in sync.
  1974. // FIXME: is this the most performant approach?
  1975. for (auto *I : *LRI)
  1976. InstructionsProfitableToSink.erase(I);
  1977. if (!ProfitableToSinkInstruction(LRI)) {
  1978. // Everything starting with this instruction won't be sunk.
  1979. ScanIdx = Idx;
  1980. InstructionsToSink = InstructionsProfitableToSink;
  1981. }
  1982. ++LRI;
  1983. --Idx;
  1984. }
  1985. }
  1986. // If no instructions can be sunk, early-return.
  1987. if (ScanIdx == 0)
  1988. return false;
  1989. }
  1990. bool Changed = false;
  1991. if (HaveNonUnconditionalPredecessors) {
  1992. if (!followedByDeoptOrUnreachable) {
  1993. // It is always legal to sink common instructions from unconditional
  1994. // predecessors. However, if not all predecessors are unconditional,
  1995. // this transformation might be pessimizing. So as a rule of thumb,
  1996. // don't do it unless we'd sink at least one non-speculatable instruction.
  1997. // See https://bugs.llvm.org/show_bug.cgi?id=30244
  1998. LRI.reset();
  1999. int Idx = 0;
  2000. bool Profitable = false;
  2001. while (Idx < ScanIdx) {
  2002. if (!isSafeToSpeculativelyExecute((*LRI)[0])) {
  2003. Profitable = true;
  2004. break;
  2005. }
  2006. --LRI;
  2007. ++Idx;
  2008. }
  2009. if (!Profitable)
  2010. return false;
  2011. }
  2012. LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n");
  2013. // We have a conditional edge and we're going to sink some instructions.
  2014. // Insert a new block postdominating all blocks we're going to sink from.
  2015. if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split", DTU))
  2016. // Edges couldn't be split.
  2017. return false;
  2018. Changed = true;
  2019. }
  2020. // Now that we've analyzed all potential sinking candidates, perform the
  2021. // actual sink. We iteratively sink the last non-terminator of the source
  2022. // blocks into their common successor unless doing so would require too
  2023. // many PHI instructions to be generated (currently only one PHI is allowed
  2024. // per sunk instruction).
  2025. //
  2026. // We can use InstructionsToSink to discount values needing PHI-merging that will
  2027. // actually be sunk in a later iteration. This allows us to be more
  2028. // aggressive in what we sink. This does allow a false positive where we
  2029. // sink presuming a later value will also be sunk, but stop half way through
  2030. // and never actually sink it which means we produce more PHIs than intended.
  2031. // This is unlikely in practice though.
  2032. int SinkIdx = 0;
  2033. for (; SinkIdx != ScanIdx; ++SinkIdx) {
  2034. LLVM_DEBUG(dbgs() << "SINK: Sink: "
  2035. << *UnconditionalPreds[0]->getTerminator()->getPrevNode()
  2036. << "\n");
  2037. // Because we've sunk every instruction in turn, the current instruction to
  2038. // sink is always at index 0.
  2039. LRI.reset();
  2040. if (!sinkLastInstruction(UnconditionalPreds)) {
  2041. LLVM_DEBUG(
  2042. dbgs()
  2043. << "SINK: stopping here, failed to actually sink instruction!\n");
  2044. break;
  2045. }
  2046. NumSinkCommonInstrs++;
  2047. Changed = true;
  2048. }
  2049. if (SinkIdx != 0)
  2050. ++NumSinkCommonCode;
  2051. return Changed;
  2052. }
  2053. namespace {
  2054. struct CompatibleSets {
  2055. using SetTy = SmallVector<InvokeInst *, 2>;
  2056. SmallVector<SetTy, 1> Sets;
  2057. static bool shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes);
  2058. SetTy &getCompatibleSet(InvokeInst *II);
  2059. void insert(InvokeInst *II);
  2060. };
  2061. CompatibleSets::SetTy &CompatibleSets::getCompatibleSet(InvokeInst *II) {
  2062. // Perform a linear scan over all the existing sets, see if the new `invoke`
  2063. // is compatible with any particular set. Since we know that all the `invokes`
  2064. // within a set are compatible, only check the first `invoke` in each set.
  2065. // WARNING: at worst, this has quadratic complexity.
  2066. for (CompatibleSets::SetTy &Set : Sets) {
  2067. if (CompatibleSets::shouldBelongToSameSet({Set.front(), II}))
  2068. return Set;
  2069. }
  2070. // Otherwise, we either had no sets yet, or this invoke forms a new set.
  2071. return Sets.emplace_back();
  2072. }
  2073. void CompatibleSets::insert(InvokeInst *II) {
  2074. getCompatibleSet(II).emplace_back(II);
  2075. }
  2076. bool CompatibleSets::shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes) {
  2077. assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
  2078. // Can we theoretically merge these `invoke`s?
  2079. auto IsIllegalToMerge = [](InvokeInst *II) {
  2080. return II->cannotMerge() || II->isInlineAsm();
  2081. };
  2082. if (any_of(Invokes, IsIllegalToMerge))
  2083. return false;
  2084. // Either both `invoke`s must be direct,
  2085. // or both `invoke`s must be indirect.
  2086. auto IsIndirectCall = [](InvokeInst *II) { return II->isIndirectCall(); };
  2087. bool HaveIndirectCalls = any_of(Invokes, IsIndirectCall);
  2088. bool AllCallsAreIndirect = all_of(Invokes, IsIndirectCall);
  2089. if (HaveIndirectCalls) {
  2090. if (!AllCallsAreIndirect)
  2091. return false;
  2092. } else {
  2093. // All callees must be identical.
  2094. Value *Callee = nullptr;
  2095. for (InvokeInst *II : Invokes) {
  2096. Value *CurrCallee = II->getCalledOperand();
  2097. assert(CurrCallee && "There is always a called operand.");
  2098. if (!Callee)
  2099. Callee = CurrCallee;
  2100. else if (Callee != CurrCallee)
  2101. return false;
  2102. }
  2103. }
  2104. // Either both `invoke`s must not have a normal destination,
  2105. // or both `invoke`s must have a normal destination,
  2106. auto HasNormalDest = [](InvokeInst *II) {
  2107. return !isa<UnreachableInst>(II->getNormalDest()->getFirstNonPHIOrDbg());
  2108. };
  2109. if (any_of(Invokes, HasNormalDest)) {
  2110. // Do not merge `invoke` that does not have a normal destination with one
  2111. // that does have a normal destination, even though doing so would be legal.
  2112. if (!all_of(Invokes, HasNormalDest))
  2113. return false;
  2114. // All normal destinations must be identical.
  2115. BasicBlock *NormalBB = nullptr;
  2116. for (InvokeInst *II : Invokes) {
  2117. BasicBlock *CurrNormalBB = II->getNormalDest();
  2118. assert(CurrNormalBB && "There is always a 'continue to' basic block.");
  2119. if (!NormalBB)
  2120. NormalBB = CurrNormalBB;
  2121. else if (NormalBB != CurrNormalBB)
  2122. return false;
  2123. }
  2124. // In the normal destination, the incoming values for these two `invoke`s
  2125. // must be compatible.
  2126. SmallPtrSet<Value *, 16> EquivalenceSet(Invokes.begin(), Invokes.end());
  2127. if (!IncomingValuesAreCompatible(
  2128. NormalBB, {Invokes[0]->getParent(), Invokes[1]->getParent()},
  2129. &EquivalenceSet))
  2130. return false;
  2131. }
  2132. #ifndef NDEBUG
  2133. // All unwind destinations must be identical.
  2134. // We know that because we have started from said unwind destination.
  2135. BasicBlock *UnwindBB = nullptr;
  2136. for (InvokeInst *II : Invokes) {
  2137. BasicBlock *CurrUnwindBB = II->getUnwindDest();
  2138. assert(CurrUnwindBB && "There is always an 'unwind to' basic block.");
  2139. if (!UnwindBB)
  2140. UnwindBB = CurrUnwindBB;
  2141. else
  2142. assert(UnwindBB == CurrUnwindBB && "Unexpected unwind destination.");
  2143. }
  2144. #endif
  2145. // In the unwind destination, the incoming values for these two `invoke`s
  2146. // must be compatible.
  2147. if (!IncomingValuesAreCompatible(
  2148. Invokes.front()->getUnwindDest(),
  2149. {Invokes[0]->getParent(), Invokes[1]->getParent()}))
  2150. return false;
  2151. // Ignoring arguments, these `invoke`s must be identical,
  2152. // including operand bundles.
  2153. const InvokeInst *II0 = Invokes.front();
  2154. for (auto *II : Invokes.drop_front())
  2155. if (!II->isSameOperationAs(II0))
  2156. return false;
  2157. // Can we theoretically form the data operands for the merged `invoke`?
  2158. auto IsIllegalToMergeArguments = [](auto Ops) {
  2159. Type *Ty = std::get<0>(Ops)->getType();
  2160. assert(Ty == std::get<1>(Ops)->getType() && "Incompatible types?");
  2161. return Ty->isTokenTy() && std::get<0>(Ops) != std::get<1>(Ops);
  2162. };
  2163. assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
  2164. if (any_of(zip(Invokes[0]->data_ops(), Invokes[1]->data_ops()),
  2165. IsIllegalToMergeArguments))
  2166. return false;
  2167. return true;
  2168. }
  2169. } // namespace
  2170. // Merge all invokes in the provided set, all of which are compatible
  2171. // as per the `CompatibleSets::shouldBelongToSameSet()`.
  2172. static void MergeCompatibleInvokesImpl(ArrayRef<InvokeInst *> Invokes,
  2173. DomTreeUpdater *DTU) {
  2174. assert(Invokes.size() >= 2 && "Must have at least two invokes to merge.");
  2175. SmallVector<DominatorTree::UpdateType, 8> Updates;
  2176. if (DTU)
  2177. Updates.reserve(2 + 3 * Invokes.size());
  2178. bool HasNormalDest =
  2179. !isa<UnreachableInst>(Invokes[0]->getNormalDest()->getFirstNonPHIOrDbg());
  2180. // Clone one of the invokes into a new basic block.
  2181. // Since they are all compatible, it doesn't matter which invoke is cloned.
  2182. InvokeInst *MergedInvoke = [&Invokes, HasNormalDest]() {
  2183. InvokeInst *II0 = Invokes.front();
  2184. BasicBlock *II0BB = II0->getParent();
  2185. BasicBlock *InsertBeforeBlock =
  2186. II0->getParent()->getIterator()->getNextNode();
  2187. Function *Func = II0BB->getParent();
  2188. LLVMContext &Ctx = II0->getContext();
  2189. BasicBlock *MergedInvokeBB = BasicBlock::Create(
  2190. Ctx, II0BB->getName() + ".invoke", Func, InsertBeforeBlock);
  2191. auto *MergedInvoke = cast<InvokeInst>(II0->clone());
  2192. // NOTE: all invokes have the same attributes, so no handling needed.
  2193. MergedInvoke->insertInto(MergedInvokeBB, MergedInvokeBB->end());
  2194. if (!HasNormalDest) {
  2195. // This set does not have a normal destination,
  2196. // so just form a new block with unreachable terminator.
  2197. BasicBlock *MergedNormalDest = BasicBlock::Create(
  2198. Ctx, II0BB->getName() + ".cont", Func, InsertBeforeBlock);
  2199. new UnreachableInst(Ctx, MergedNormalDest);
  2200. MergedInvoke->setNormalDest(MergedNormalDest);
  2201. }
  2202. // The unwind destination, however, remainds identical for all invokes here.
  2203. return MergedInvoke;
  2204. }();
  2205. if (DTU) {
  2206. // Predecessor blocks that contained these invokes will now branch to
  2207. // the new block that contains the merged invoke, ...
  2208. for (InvokeInst *II : Invokes)
  2209. Updates.push_back(
  2210. {DominatorTree::Insert, II->getParent(), MergedInvoke->getParent()});
  2211. // ... which has the new `unreachable` block as normal destination,
  2212. // or unwinds to the (same for all `invoke`s in this set) `landingpad`,
  2213. for (BasicBlock *SuccBBOfMergedInvoke : successors(MergedInvoke))
  2214. Updates.push_back({DominatorTree::Insert, MergedInvoke->getParent(),
  2215. SuccBBOfMergedInvoke});
  2216. // Since predecessor blocks now unconditionally branch to a new block,
  2217. // they no longer branch to their original successors.
  2218. for (InvokeInst *II : Invokes)
  2219. for (BasicBlock *SuccOfPredBB : successors(II->getParent()))
  2220. Updates.push_back(
  2221. {DominatorTree::Delete, II->getParent(), SuccOfPredBB});
  2222. }
  2223. bool IsIndirectCall = Invokes[0]->isIndirectCall();
  2224. // Form the merged operands for the merged invoke.
  2225. for (Use &U : MergedInvoke->operands()) {
  2226. // Only PHI together the indirect callees and data operands.
  2227. if (MergedInvoke->isCallee(&U)) {
  2228. if (!IsIndirectCall)
  2229. continue;
  2230. } else if (!MergedInvoke->isDataOperand(&U))
  2231. continue;
  2232. // Don't create trivial PHI's with all-identical incoming values.
  2233. bool NeedPHI = any_of(Invokes, [&U](InvokeInst *II) {
  2234. return II->getOperand(U.getOperandNo()) != U.get();
  2235. });
  2236. if (!NeedPHI)
  2237. continue;
  2238. // Form a PHI out of all the data ops under this index.
  2239. PHINode *PN = PHINode::Create(
  2240. U->getType(), /*NumReservedValues=*/Invokes.size(), "", MergedInvoke);
  2241. for (InvokeInst *II : Invokes)
  2242. PN->addIncoming(II->getOperand(U.getOperandNo()), II->getParent());
  2243. U.set(PN);
  2244. }
  2245. // We've ensured that each PHI node has compatible (identical) incoming values
  2246. // when coming from each of the `invoke`s in the current merge set,
  2247. // so update the PHI nodes accordingly.
  2248. for (BasicBlock *Succ : successors(MergedInvoke))
  2249. AddPredecessorToBlock(Succ, /*NewPred=*/MergedInvoke->getParent(),
  2250. /*ExistPred=*/Invokes.front()->getParent());
  2251. // And finally, replace the original `invoke`s with an unconditional branch
  2252. // to the block with the merged `invoke`. Also, give that merged `invoke`
  2253. // the merged debugloc of all the original `invoke`s.
  2254. const DILocation *MergedDebugLoc = nullptr;
  2255. for (InvokeInst *II : Invokes) {
  2256. // Compute the debug location common to all the original `invoke`s.
  2257. if (!MergedDebugLoc)
  2258. MergedDebugLoc = II->getDebugLoc();
  2259. else
  2260. MergedDebugLoc =
  2261. DILocation::getMergedLocation(MergedDebugLoc, II->getDebugLoc());
  2262. // And replace the old `invoke` with an unconditionally branch
  2263. // to the block with the merged `invoke`.
  2264. for (BasicBlock *OrigSuccBB : successors(II->getParent()))
  2265. OrigSuccBB->removePredecessor(II->getParent());
  2266. BranchInst::Create(MergedInvoke->getParent(), II->getParent());
  2267. II->replaceAllUsesWith(MergedInvoke);
  2268. II->eraseFromParent();
  2269. ++NumInvokesMerged;
  2270. }
  2271. MergedInvoke->setDebugLoc(MergedDebugLoc);
  2272. ++NumInvokeSetsFormed;
  2273. if (DTU)
  2274. DTU->applyUpdates(Updates);
  2275. }
  2276. /// If this block is a `landingpad` exception handling block, categorize all
  2277. /// the predecessor `invoke`s into sets, with all `invoke`s in each set
  2278. /// being "mergeable" together, and then merge invokes in each set together.
  2279. ///
  2280. /// This is a weird mix of hoisting and sinking. Visually, it goes from:
  2281. /// [...] [...]
  2282. /// | |
  2283. /// [invoke0] [invoke1]
  2284. /// / \ / \
  2285. /// [cont0] [landingpad] [cont1]
  2286. /// to:
  2287. /// [...] [...]
  2288. /// \ /
  2289. /// [invoke]
  2290. /// / \
  2291. /// [cont] [landingpad]
  2292. ///
  2293. /// But of course we can only do that if the invokes share the `landingpad`,
  2294. /// edges invoke0->cont0 and invoke1->cont1 are "compatible",
  2295. /// and the invoked functions are "compatible".
  2296. static bool MergeCompatibleInvokes(BasicBlock *BB, DomTreeUpdater *DTU) {
  2297. if (!EnableMergeCompatibleInvokes)
  2298. return false;
  2299. bool Changed = false;
  2300. // FIXME: generalize to all exception handling blocks?
  2301. if (!BB->isLandingPad())
  2302. return Changed;
  2303. CompatibleSets Grouper;
  2304. // Record all the predecessors of this `landingpad`. As per verifier,
  2305. // the only allowed predecessor is the unwind edge of an `invoke`.
  2306. // We want to group "compatible" `invokes` into the same set to be merged.
  2307. for (BasicBlock *PredBB : predecessors(BB))
  2308. Grouper.insert(cast<InvokeInst>(PredBB->getTerminator()));
  2309. // And now, merge `invoke`s that were grouped togeter.
  2310. for (ArrayRef<InvokeInst *> Invokes : Grouper.Sets) {
  2311. if (Invokes.size() < 2)
  2312. continue;
  2313. Changed = true;
  2314. MergeCompatibleInvokesImpl(Invokes, DTU);
  2315. }
  2316. return Changed;
  2317. }
  2318. namespace {
  2319. /// Track ephemeral values, which should be ignored for cost-modelling
  2320. /// purposes. Requires walking instructions in reverse order.
  2321. class EphemeralValueTracker {
  2322. SmallPtrSet<const Instruction *, 32> EphValues;
  2323. bool isEphemeral(const Instruction *I) {
  2324. if (isa<AssumeInst>(I))
  2325. return true;
  2326. return !I->mayHaveSideEffects() && !I->isTerminator() &&
  2327. all_of(I->users(), [&](const User *U) {
  2328. return EphValues.count(cast<Instruction>(U));
  2329. });
  2330. }
  2331. public:
  2332. bool track(const Instruction *I) {
  2333. if (isEphemeral(I)) {
  2334. EphValues.insert(I);
  2335. return true;
  2336. }
  2337. return false;
  2338. }
  2339. bool contains(const Instruction *I) const { return EphValues.contains(I); }
  2340. };
  2341. } // namespace
  2342. /// Determine if we can hoist sink a sole store instruction out of a
  2343. /// conditional block.
  2344. ///
  2345. /// We are looking for code like the following:
  2346. /// BrBB:
  2347. /// store i32 %add, i32* %arrayidx2
  2348. /// ... // No other stores or function calls (we could be calling a memory
  2349. /// ... // function).
  2350. /// %cmp = icmp ult %x, %y
  2351. /// br i1 %cmp, label %EndBB, label %ThenBB
  2352. /// ThenBB:
  2353. /// store i32 %add5, i32* %arrayidx2
  2354. /// br label EndBB
  2355. /// EndBB:
  2356. /// ...
  2357. /// We are going to transform this into:
  2358. /// BrBB:
  2359. /// store i32 %add, i32* %arrayidx2
  2360. /// ... //
  2361. /// %cmp = icmp ult %x, %y
  2362. /// %add.add5 = select i1 %cmp, i32 %add, %add5
  2363. /// store i32 %add.add5, i32* %arrayidx2
  2364. /// ...
  2365. ///
  2366. /// \return The pointer to the value of the previous store if the store can be
  2367. /// hoisted into the predecessor block. 0 otherwise.
  2368. static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
  2369. BasicBlock *StoreBB, BasicBlock *EndBB) {
  2370. StoreInst *StoreToHoist = dyn_cast<StoreInst>(I);
  2371. if (!StoreToHoist)
  2372. return nullptr;
  2373. // Volatile or atomic.
  2374. if (!StoreToHoist->isSimple())
  2375. return nullptr;
  2376. Value *StorePtr = StoreToHoist->getPointerOperand();
  2377. Type *StoreTy = StoreToHoist->getValueOperand()->getType();
  2378. // Look for a store to the same pointer in BrBB.
  2379. unsigned MaxNumInstToLookAt = 9;
  2380. // Skip pseudo probe intrinsic calls which are not really killing any memory
  2381. // accesses.
  2382. for (Instruction &CurI : reverse(BrBB->instructionsWithoutDebug(true))) {
  2383. if (!MaxNumInstToLookAt)
  2384. break;
  2385. --MaxNumInstToLookAt;
  2386. // Could be calling an instruction that affects memory like free().
  2387. if (CurI.mayWriteToMemory() && !isa<StoreInst>(CurI))
  2388. return nullptr;
  2389. if (auto *SI = dyn_cast<StoreInst>(&CurI)) {
  2390. // Found the previous store to same location and type. Make sure it is
  2391. // simple, to avoid introducing a spurious non-atomic write after an
  2392. // atomic write.
  2393. if (SI->getPointerOperand() == StorePtr &&
  2394. SI->getValueOperand()->getType() == StoreTy && SI->isSimple())
  2395. // Found the previous store, return its value operand.
  2396. return SI->getValueOperand();
  2397. return nullptr; // Unknown store.
  2398. }
  2399. if (auto *LI = dyn_cast<LoadInst>(&CurI)) {
  2400. if (LI->getPointerOperand() == StorePtr && LI->getType() == StoreTy &&
  2401. LI->isSimple()) {
  2402. // Local objects (created by an `alloca` instruction) are always
  2403. // writable, so once we are past a read from a location it is valid to
  2404. // also write to that same location.
  2405. // If the address of the local object never escapes the function, that
  2406. // means it's never concurrently read or written, hence moving the store
  2407. // from under the condition will not introduce a data race.
  2408. auto *AI = dyn_cast<AllocaInst>(getUnderlyingObject(StorePtr));
  2409. if (AI && !PointerMayBeCaptured(AI, false, true))
  2410. // Found a previous load, return it.
  2411. return LI;
  2412. }
  2413. // The load didn't work out, but we may still find a store.
  2414. }
  2415. }
  2416. return nullptr;
  2417. }
  2418. /// Estimate the cost of the insertion(s) and check that the PHI nodes can be
  2419. /// converted to selects.
  2420. static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB,
  2421. BasicBlock *EndBB,
  2422. unsigned &SpeculatedInstructions,
  2423. InstructionCost &Cost,
  2424. const TargetTransformInfo &TTI) {
  2425. TargetTransformInfo::TargetCostKind CostKind =
  2426. BB->getParent()->hasMinSize()
  2427. ? TargetTransformInfo::TCK_CodeSize
  2428. : TargetTransformInfo::TCK_SizeAndLatency;
  2429. bool HaveRewritablePHIs = false;
  2430. for (PHINode &PN : EndBB->phis()) {
  2431. Value *OrigV = PN.getIncomingValueForBlock(BB);
  2432. Value *ThenV = PN.getIncomingValueForBlock(ThenBB);
  2433. // FIXME: Try to remove some of the duplication with HoistThenElseCodeToIf.
  2434. // Skip PHIs which are trivial.
  2435. if (ThenV == OrigV)
  2436. continue;
  2437. Cost += TTI.getCmpSelInstrCost(Instruction::Select, PN.getType(), nullptr,
  2438. CmpInst::BAD_ICMP_PREDICATE, CostKind);
  2439. // Don't convert to selects if we could remove undefined behavior instead.
  2440. if (passingValueIsAlwaysUndefined(OrigV, &PN) ||
  2441. passingValueIsAlwaysUndefined(ThenV, &PN))
  2442. return false;
  2443. HaveRewritablePHIs = true;
  2444. ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV);
  2445. ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV);
  2446. if (!OrigCE && !ThenCE)
  2447. continue; // Known cheap (FIXME: Maybe not true for aggregates).
  2448. InstructionCost OrigCost = OrigCE ? computeSpeculationCost(OrigCE, TTI) : 0;
  2449. InstructionCost ThenCost = ThenCE ? computeSpeculationCost(ThenCE, TTI) : 0;
  2450. InstructionCost MaxCost =
  2451. 2 * PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
  2452. if (OrigCost + ThenCost > MaxCost)
  2453. return false;
  2454. // Account for the cost of an unfolded ConstantExpr which could end up
  2455. // getting expanded into Instructions.
  2456. // FIXME: This doesn't account for how many operations are combined in the
  2457. // constant expression.
  2458. ++SpeculatedInstructions;
  2459. if (SpeculatedInstructions > 1)
  2460. return false;
  2461. }
  2462. return HaveRewritablePHIs;
  2463. }
  2464. /// Speculate a conditional basic block flattening the CFG.
  2465. ///
  2466. /// Note that this is a very risky transform currently. Speculating
  2467. /// instructions like this is most often not desirable. Instead, there is an MI
  2468. /// pass which can do it with full awareness of the resource constraints.
  2469. /// However, some cases are "obvious" and we should do directly. An example of
  2470. /// this is speculating a single, reasonably cheap instruction.
  2471. ///
  2472. /// There is only one distinct advantage to flattening the CFG at the IR level:
  2473. /// it makes very common but simplistic optimizations such as are common in
  2474. /// instcombine and the DAG combiner more powerful by removing CFG edges and
  2475. /// modeling their effects with easier to reason about SSA value graphs.
  2476. ///
  2477. ///
  2478. /// An illustration of this transform is turning this IR:
  2479. /// \code
  2480. /// BB:
  2481. /// %cmp = icmp ult %x, %y
  2482. /// br i1 %cmp, label %EndBB, label %ThenBB
  2483. /// ThenBB:
  2484. /// %sub = sub %x, %y
  2485. /// br label BB2
  2486. /// EndBB:
  2487. /// %phi = phi [ %sub, %ThenBB ], [ 0, %EndBB ]
  2488. /// ...
  2489. /// \endcode
  2490. ///
  2491. /// Into this IR:
  2492. /// \code
  2493. /// BB:
  2494. /// %cmp = icmp ult %x, %y
  2495. /// %sub = sub %x, %y
  2496. /// %cond = select i1 %cmp, 0, %sub
  2497. /// ...
  2498. /// \endcode
  2499. ///
  2500. /// \returns true if the conditional block is removed.
  2501. bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
  2502. const TargetTransformInfo &TTI) {
  2503. // Be conservative for now. FP select instruction can often be expensive.
  2504. Value *BrCond = BI->getCondition();
  2505. if (isa<FCmpInst>(BrCond))
  2506. return false;
  2507. BasicBlock *BB = BI->getParent();
  2508. BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0);
  2509. InstructionCost Budget =
  2510. PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
  2511. // If ThenBB is actually on the false edge of the conditional branch, remember
  2512. // to swap the select operands later.
  2513. bool Invert = false;
  2514. if (ThenBB != BI->getSuccessor(0)) {
  2515. assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?");
  2516. Invert = true;
  2517. }
  2518. assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
  2519. // If the branch is non-unpredictable, and is predicted to *not* branch to
  2520. // the `then` block, then avoid speculating it.
  2521. if (!BI->getMetadata(LLVMContext::MD_unpredictable)) {
  2522. uint64_t TWeight, FWeight;
  2523. if (extractBranchWeights(*BI, TWeight, FWeight) &&
  2524. (TWeight + FWeight) != 0) {
  2525. uint64_t EndWeight = Invert ? TWeight : FWeight;
  2526. BranchProbability BIEndProb =
  2527. BranchProbability::getBranchProbability(EndWeight, TWeight + FWeight);
  2528. BranchProbability Likely = TTI.getPredictableBranchThreshold();
  2529. if (BIEndProb >= Likely)
  2530. return false;
  2531. }
  2532. }
  2533. // Keep a count of how many times instructions are used within ThenBB when
  2534. // they are candidates for sinking into ThenBB. Specifically:
  2535. // - They are defined in BB, and
  2536. // - They have no side effects, and
  2537. // - All of their uses are in ThenBB.
  2538. SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
  2539. SmallVector<Instruction *, 4> SpeculatedDbgIntrinsics;
  2540. unsigned SpeculatedInstructions = 0;
  2541. Value *SpeculatedStoreValue = nullptr;
  2542. StoreInst *SpeculatedStore = nullptr;
  2543. EphemeralValueTracker EphTracker;
  2544. for (Instruction &I : reverse(drop_end(*ThenBB))) {
  2545. // Skip debug info.
  2546. if (isa<DbgInfoIntrinsic>(I)) {
  2547. SpeculatedDbgIntrinsics.push_back(&I);
  2548. continue;
  2549. }
  2550. // Skip pseudo probes. The consequence is we lose track of the branch
  2551. // probability for ThenBB, which is fine since the optimization here takes
  2552. // place regardless of the branch probability.
  2553. if (isa<PseudoProbeInst>(I)) {
  2554. // The probe should be deleted so that it will not be over-counted when
  2555. // the samples collected on the non-conditional path are counted towards
  2556. // the conditional path. We leave it for the counts inference algorithm to
  2557. // figure out a proper count for an unknown probe.
  2558. SpeculatedDbgIntrinsics.push_back(&I);
  2559. continue;
  2560. }
  2561. // Ignore ephemeral values, they will be dropped by the transform.
  2562. if (EphTracker.track(&I))
  2563. continue;
  2564. // Only speculatively execute a single instruction (not counting the
  2565. // terminator) for now.
  2566. ++SpeculatedInstructions;
  2567. if (SpeculatedInstructions > 1)
  2568. return false;
  2569. // Don't hoist the instruction if it's unsafe or expensive.
  2570. if (!isSafeToSpeculativelyExecute(&I) &&
  2571. !(HoistCondStores && (SpeculatedStoreValue = isSafeToSpeculateStore(
  2572. &I, BB, ThenBB, EndBB))))
  2573. return false;
  2574. if (!SpeculatedStoreValue &&
  2575. computeSpeculationCost(&I, TTI) >
  2576. PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic)
  2577. return false;
  2578. // Store the store speculation candidate.
  2579. if (SpeculatedStoreValue)
  2580. SpeculatedStore = cast<StoreInst>(&I);
  2581. // Do not hoist the instruction if any of its operands are defined but not
  2582. // used in BB. The transformation will prevent the operand from
  2583. // being sunk into the use block.
  2584. for (Use &Op : I.operands()) {
  2585. Instruction *OpI = dyn_cast<Instruction>(Op);
  2586. if (!OpI || OpI->getParent() != BB || OpI->mayHaveSideEffects())
  2587. continue; // Not a candidate for sinking.
  2588. ++SinkCandidateUseCounts[OpI];
  2589. }
  2590. }
  2591. // Consider any sink candidates which are only used in ThenBB as costs for
  2592. // speculation. Note, while we iterate over a DenseMap here, we are summing
  2593. // and so iteration order isn't significant.
  2594. for (const auto &[Inst, Count] : SinkCandidateUseCounts)
  2595. if (Inst->hasNUses(Count)) {
  2596. ++SpeculatedInstructions;
  2597. if (SpeculatedInstructions > 1)
  2598. return false;
  2599. }
  2600. // Check that we can insert the selects and that it's not too expensive to do
  2601. // so.
  2602. bool Convert = SpeculatedStore != nullptr;
  2603. InstructionCost Cost = 0;
  2604. Convert |= validateAndCostRequiredSelects(BB, ThenBB, EndBB,
  2605. SpeculatedInstructions,
  2606. Cost, TTI);
  2607. if (!Convert || Cost > Budget)
  2608. return false;
  2609. // If we get here, we can hoist the instruction and if-convert.
  2610. LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
  2611. // Insert a select of the value of the speculated store.
  2612. if (SpeculatedStoreValue) {
  2613. IRBuilder<NoFolder> Builder(BI);
  2614. Value *OrigV = SpeculatedStore->getValueOperand();
  2615. Value *TrueV = SpeculatedStore->getValueOperand();
  2616. Value *FalseV = SpeculatedStoreValue;
  2617. if (Invert)
  2618. std::swap(TrueV, FalseV);
  2619. Value *S = Builder.CreateSelect(
  2620. BrCond, TrueV, FalseV, "spec.store.select", BI);
  2621. SpeculatedStore->setOperand(0, S);
  2622. SpeculatedStore->applyMergedLocation(BI->getDebugLoc(),
  2623. SpeculatedStore->getDebugLoc());
  2624. // The value stored is still conditional, but the store itself is now
  2625. // unconditonally executed, so we must be sure that any linked dbg.assign
  2626. // intrinsics are tracking the new stored value (the result of the
  2627. // select). If we don't, and the store were to be removed by another pass
  2628. // (e.g. DSE), then we'd eventually end up emitting a location describing
  2629. // the conditional value, unconditionally.
  2630. //
  2631. // === Before this transformation ===
  2632. // pred:
  2633. // store %one, %x.dest, !DIAssignID !1
  2634. // dbg.assign %one, "x", ..., !1, ...
  2635. // br %cond if.then
  2636. //
  2637. // if.then:
  2638. // store %two, %x.dest, !DIAssignID !2
  2639. // dbg.assign %two, "x", ..., !2, ...
  2640. //
  2641. // === After this transformation ===
  2642. // pred:
  2643. // store %one, %x.dest, !DIAssignID !1
  2644. // dbg.assign %one, "x", ..., !1
  2645. /// ...
  2646. // %merge = select %cond, %two, %one
  2647. // store %merge, %x.dest, !DIAssignID !2
  2648. // dbg.assign %merge, "x", ..., !2
  2649. for (auto *DAI : at::getAssignmentMarkers(SpeculatedStore)) {
  2650. if (any_of(DAI->location_ops(), [&](Value *V) { return V == OrigV; }))
  2651. DAI->replaceVariableLocationOp(OrigV, S);
  2652. }
  2653. }
  2654. // Metadata can be dependent on the condition we are hoisting above.
  2655. // Conservatively strip all metadata on the instruction. Drop the debug loc
  2656. // to avoid making it appear as if the condition is a constant, which would
  2657. // be misleading while debugging.
  2658. // Similarly strip attributes that maybe dependent on condition we are
  2659. // hoisting above.
  2660. for (auto &I : make_early_inc_range(*ThenBB)) {
  2661. if (!SpeculatedStoreValue || &I != SpeculatedStore) {
  2662. // Don't update the DILocation of dbg.assign intrinsics.
  2663. if (!isa<DbgAssignIntrinsic>(&I))
  2664. I.setDebugLoc(DebugLoc());
  2665. }
  2666. I.dropUndefImplyingAttrsAndUnknownMetadata();
  2667. // Drop ephemeral values.
  2668. if (EphTracker.contains(&I)) {
  2669. I.replaceAllUsesWith(PoisonValue::get(I.getType()));
  2670. I.eraseFromParent();
  2671. }
  2672. }
  2673. // Hoist the instructions.
  2674. BB->splice(BI->getIterator(), ThenBB, ThenBB->begin(),
  2675. std::prev(ThenBB->end()));
  2676. // Insert selects and rewrite the PHI operands.
  2677. IRBuilder<NoFolder> Builder(BI);
  2678. for (PHINode &PN : EndBB->phis()) {
  2679. unsigned OrigI = PN.getBasicBlockIndex(BB);
  2680. unsigned ThenI = PN.getBasicBlockIndex(ThenBB);
  2681. Value *OrigV = PN.getIncomingValue(OrigI);
  2682. Value *ThenV = PN.getIncomingValue(ThenI);
  2683. // Skip PHIs which are trivial.
  2684. if (OrigV == ThenV)
  2685. continue;
  2686. // Create a select whose true value is the speculatively executed value and
  2687. // false value is the pre-existing value. Swap them if the branch
  2688. // destinations were inverted.
  2689. Value *TrueV = ThenV, *FalseV = OrigV;
  2690. if (Invert)
  2691. std::swap(TrueV, FalseV);
  2692. Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV, "spec.select", BI);
  2693. PN.setIncomingValue(OrigI, V);
  2694. PN.setIncomingValue(ThenI, V);
  2695. }
  2696. // Remove speculated dbg intrinsics.
  2697. // FIXME: Is it possible to do this in a more elegant way? Moving/merging the
  2698. // dbg value for the different flows and inserting it after the select.
  2699. for (Instruction *I : SpeculatedDbgIntrinsics) {
  2700. // We still want to know that an assignment took place so don't remove
  2701. // dbg.assign intrinsics.
  2702. if (!isa<DbgAssignIntrinsic>(I))
  2703. I->eraseFromParent();
  2704. }
  2705. ++NumSpeculations;
  2706. return true;
  2707. }
  2708. /// Return true if we can thread a branch across this block.
  2709. static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
  2710. int Size = 0;
  2711. EphemeralValueTracker EphTracker;
  2712. // Walk the loop in reverse so that we can identify ephemeral values properly
  2713. // (values only feeding assumes).
  2714. for (Instruction &I : reverse(BB->instructionsWithoutDebug(false))) {
  2715. // Can't fold blocks that contain noduplicate or convergent calls.
  2716. if (CallInst *CI = dyn_cast<CallInst>(&I))
  2717. if (CI->cannotDuplicate() || CI->isConvergent())
  2718. return false;
  2719. // Ignore ephemeral values which are deleted during codegen.
  2720. // We will delete Phis while threading, so Phis should not be accounted in
  2721. // block's size.
  2722. if (!EphTracker.track(&I) && !isa<PHINode>(I)) {
  2723. if (Size++ > MaxSmallBlockSize)
  2724. return false; // Don't clone large BB's.
  2725. }
  2726. // We can only support instructions that do not define values that are
  2727. // live outside of the current basic block.
  2728. for (User *U : I.users()) {
  2729. Instruction *UI = cast<Instruction>(U);
  2730. if (UI->getParent() != BB || isa<PHINode>(UI))
  2731. return false;
  2732. }
  2733. // Looks ok, continue checking.
  2734. }
  2735. return true;
  2736. }
  2737. static ConstantInt *getKnownValueOnEdge(Value *V, BasicBlock *From,
  2738. BasicBlock *To) {
  2739. // Don't look past the block defining the value, we might get the value from
  2740. // a previous loop iteration.
  2741. auto *I = dyn_cast<Instruction>(V);
  2742. if (I && I->getParent() == To)
  2743. return nullptr;
  2744. // We know the value if the From block branches on it.
  2745. auto *BI = dyn_cast<BranchInst>(From->getTerminator());
  2746. if (BI && BI->isConditional() && BI->getCondition() == V &&
  2747. BI->getSuccessor(0) != BI->getSuccessor(1))
  2748. return BI->getSuccessor(0) == To ? ConstantInt::getTrue(BI->getContext())
  2749. : ConstantInt::getFalse(BI->getContext());
  2750. return nullptr;
  2751. }
  2752. /// If we have a conditional branch on something for which we know the constant
  2753. /// value in predecessors (e.g. a phi node in the current block), thread edges
  2754. /// from the predecessor to their ultimate destination.
  2755. static std::optional<bool>
  2756. FoldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU,
  2757. const DataLayout &DL,
  2758. AssumptionCache *AC) {
  2759. SmallMapVector<ConstantInt *, SmallSetVector<BasicBlock *, 2>, 2> KnownValues;
  2760. BasicBlock *BB = BI->getParent();
  2761. Value *Cond = BI->getCondition();
  2762. PHINode *PN = dyn_cast<PHINode>(Cond);
  2763. if (PN && PN->getParent() == BB) {
  2764. // Degenerate case of a single entry PHI.
  2765. if (PN->getNumIncomingValues() == 1) {
  2766. FoldSingleEntryPHINodes(PN->getParent());
  2767. return true;
  2768. }
  2769. for (Use &U : PN->incoming_values())
  2770. if (auto *CB = dyn_cast<ConstantInt>(U))
  2771. KnownValues[CB].insert(PN->getIncomingBlock(U));
  2772. } else {
  2773. for (BasicBlock *Pred : predecessors(BB)) {
  2774. if (ConstantInt *CB = getKnownValueOnEdge(Cond, Pred, BB))
  2775. KnownValues[CB].insert(Pred);
  2776. }
  2777. }
  2778. if (KnownValues.empty())
  2779. return false;
  2780. // Now we know that this block has multiple preds and two succs.
  2781. // Check that the block is small enough and values defined in the block are
  2782. // not used outside of it.
  2783. if (!BlockIsSimpleEnoughToThreadThrough(BB))
  2784. return false;
  2785. for (const auto &Pair : KnownValues) {
  2786. // Okay, we now know that all edges from PredBB should be revectored to
  2787. // branch to RealDest.
  2788. ConstantInt *CB = Pair.first;
  2789. ArrayRef<BasicBlock *> PredBBs = Pair.second.getArrayRef();
  2790. BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
  2791. if (RealDest == BB)
  2792. continue; // Skip self loops.
  2793. // Skip if the predecessor's terminator is an indirect branch.
  2794. if (any_of(PredBBs, [](BasicBlock *PredBB) {
  2795. return isa<IndirectBrInst>(PredBB->getTerminator());
  2796. }))
  2797. continue;
  2798. LLVM_DEBUG({
  2799. dbgs() << "Condition " << *Cond << " in " << BB->getName()
  2800. << " has value " << *Pair.first << " in predecessors:\n";
  2801. for (const BasicBlock *PredBB : Pair.second)
  2802. dbgs() << " " << PredBB->getName() << "\n";
  2803. dbgs() << "Threading to destination " << RealDest->getName() << ".\n";
  2804. });
  2805. // Split the predecessors we are threading into a new edge block. We'll
  2806. // clone the instructions into this block, and then redirect it to RealDest.
  2807. BasicBlock *EdgeBB = SplitBlockPredecessors(BB, PredBBs, ".critedge", DTU);
  2808. // TODO: These just exist to reduce test diff, we can drop them if we like.
  2809. EdgeBB->setName(RealDest->getName() + ".critedge");
  2810. EdgeBB->moveBefore(RealDest);
  2811. // Update PHI nodes.
  2812. AddPredecessorToBlock(RealDest, EdgeBB, BB);
  2813. // BB may have instructions that are being threaded over. Clone these
  2814. // instructions into EdgeBB. We know that there will be no uses of the
  2815. // cloned instructions outside of EdgeBB.
  2816. BasicBlock::iterator InsertPt = EdgeBB->getFirstInsertionPt();
  2817. DenseMap<Value *, Value *> TranslateMap; // Track translated values.
  2818. TranslateMap[Cond] = CB;
  2819. for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
  2820. if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
  2821. TranslateMap[PN] = PN->getIncomingValueForBlock(EdgeBB);
  2822. continue;
  2823. }
  2824. // Clone the instruction.
  2825. Instruction *N = BBI->clone();
  2826. if (BBI->hasName())
  2827. N->setName(BBI->getName() + ".c");
  2828. // Update operands due to translation.
  2829. for (Use &Op : N->operands()) {
  2830. DenseMap<Value *, Value *>::iterator PI = TranslateMap.find(Op);
  2831. if (PI != TranslateMap.end())
  2832. Op = PI->second;
  2833. }
  2834. // Check for trivial simplification.
  2835. if (Value *V = simplifyInstruction(N, {DL, nullptr, nullptr, AC})) {
  2836. if (!BBI->use_empty())
  2837. TranslateMap[&*BBI] = V;
  2838. if (!N->mayHaveSideEffects()) {
  2839. N->deleteValue(); // Instruction folded away, don't need actual inst
  2840. N = nullptr;
  2841. }
  2842. } else {
  2843. if (!BBI->use_empty())
  2844. TranslateMap[&*BBI] = N;
  2845. }
  2846. if (N) {
  2847. // Insert the new instruction into its new home.
  2848. N->insertInto(EdgeBB, InsertPt);
  2849. // Register the new instruction with the assumption cache if necessary.
  2850. if (auto *Assume = dyn_cast<AssumeInst>(N))
  2851. if (AC)
  2852. AC->registerAssumption(Assume);
  2853. }
  2854. }
  2855. BB->removePredecessor(EdgeBB);
  2856. BranchInst *EdgeBI = cast<BranchInst>(EdgeBB->getTerminator());
  2857. EdgeBI->setSuccessor(0, RealDest);
  2858. EdgeBI->setDebugLoc(BI->getDebugLoc());
  2859. if (DTU) {
  2860. SmallVector<DominatorTree::UpdateType, 2> Updates;
  2861. Updates.push_back({DominatorTree::Delete, EdgeBB, BB});
  2862. Updates.push_back({DominatorTree::Insert, EdgeBB, RealDest});
  2863. DTU->applyUpdates(Updates);
  2864. }
  2865. // For simplicity, we created a separate basic block for the edge. Merge
  2866. // it back into the predecessor if possible. This not only avoids
  2867. // unnecessary SimplifyCFG iterations, but also makes sure that we don't
  2868. // bypass the check for trivial cycles above.
  2869. MergeBlockIntoPredecessor(EdgeBB, DTU);
  2870. // Signal repeat, simplifying any other constants.
  2871. return std::nullopt;
  2872. }
  2873. return false;
  2874. }
  2875. static bool FoldCondBranchOnValueKnownInPredecessor(BranchInst *BI,
  2876. DomTreeUpdater *DTU,
  2877. const DataLayout &DL,
  2878. AssumptionCache *AC) {
  2879. std::optional<bool> Result;
  2880. bool EverChanged = false;
  2881. do {
  2882. // Note that None means "we changed things, but recurse further."
  2883. Result = FoldCondBranchOnValueKnownInPredecessorImpl(BI, DTU, DL, AC);
  2884. EverChanged |= Result == std::nullopt || *Result;
  2885. } while (Result == std::nullopt);
  2886. return EverChanged;
  2887. }
  2888. /// Given a BB that starts with the specified two-entry PHI node,
  2889. /// see if we can eliminate it.
  2890. static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
  2891. DomTreeUpdater *DTU, const DataLayout &DL) {
  2892. // Ok, this is a two entry PHI node. Check to see if this is a simple "if
  2893. // statement", which has a very simple dominance structure. Basically, we
  2894. // are trying to find the condition that is being branched on, which
  2895. // subsequently causes this merge to happen. We really want control
  2896. // dependence information for this check, but simplifycfg can't keep it up
  2897. // to date, and this catches most of the cases we care about anyway.
  2898. BasicBlock *BB = PN->getParent();
  2899. BasicBlock *IfTrue, *IfFalse;
  2900. BranchInst *DomBI = GetIfCondition(BB, IfTrue, IfFalse);
  2901. if (!DomBI)
  2902. return false;
  2903. Value *IfCond = DomBI->getCondition();
  2904. // Don't bother if the branch will be constant folded trivially.
  2905. if (isa<ConstantInt>(IfCond))
  2906. return false;
  2907. BasicBlock *DomBlock = DomBI->getParent();
  2908. SmallVector<BasicBlock *, 2> IfBlocks;
  2909. llvm::copy_if(
  2910. PN->blocks(), std::back_inserter(IfBlocks), [](BasicBlock *IfBlock) {
  2911. return cast<BranchInst>(IfBlock->getTerminator())->isUnconditional();
  2912. });
  2913. assert((IfBlocks.size() == 1 || IfBlocks.size() == 2) &&
  2914. "Will have either one or two blocks to speculate.");
  2915. // If the branch is non-unpredictable, see if we either predictably jump to
  2916. // the merge bb (if we have only a single 'then' block), or if we predictably
  2917. // jump to one specific 'then' block (if we have two of them).
  2918. // It isn't beneficial to speculatively execute the code
  2919. // from the block that we know is predictably not entered.
  2920. if (!DomBI->getMetadata(LLVMContext::MD_unpredictable)) {
  2921. uint64_t TWeight, FWeight;
  2922. if (extractBranchWeights(*DomBI, TWeight, FWeight) &&
  2923. (TWeight + FWeight) != 0) {
  2924. BranchProbability BITrueProb =
  2925. BranchProbability::getBranchProbability(TWeight, TWeight + FWeight);
  2926. BranchProbability Likely = TTI.getPredictableBranchThreshold();
  2927. BranchProbability BIFalseProb = BITrueProb.getCompl();
  2928. if (IfBlocks.size() == 1) {
  2929. BranchProbability BIBBProb =
  2930. DomBI->getSuccessor(0) == BB ? BITrueProb : BIFalseProb;
  2931. if (BIBBProb >= Likely)
  2932. return false;
  2933. } else {
  2934. if (BITrueProb >= Likely || BIFalseProb >= Likely)
  2935. return false;
  2936. }
  2937. }
  2938. }
  2939. // Don't try to fold an unreachable block. For example, the phi node itself
  2940. // can't be the candidate if-condition for a select that we want to form.
  2941. if (auto *IfCondPhiInst = dyn_cast<PHINode>(IfCond))
  2942. if (IfCondPhiInst->getParent() == BB)
  2943. return false;
  2944. // Okay, we found that we can merge this two-entry phi node into a select.
  2945. // Doing so would require us to fold *all* two entry phi nodes in this block.
  2946. // At some point this becomes non-profitable (particularly if the target
  2947. // doesn't support cmov's). Only do this transformation if there are two or
  2948. // fewer PHI nodes in this block.
  2949. unsigned NumPhis = 0;
  2950. for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I)
  2951. if (NumPhis > 2)
  2952. return false;
  2953. // Loop over the PHI's seeing if we can promote them all to select
  2954. // instructions. While we are at it, keep track of the instructions
  2955. // that need to be moved to the dominating block.
  2956. SmallPtrSet<Instruction *, 4> AggressiveInsts;
  2957. InstructionCost Cost = 0;
  2958. InstructionCost Budget =
  2959. TwoEntryPHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
  2960. bool Changed = false;
  2961. for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
  2962. PHINode *PN = cast<PHINode>(II++);
  2963. if (Value *V = simplifyInstruction(PN, {DL, PN})) {
  2964. PN->replaceAllUsesWith(V);
  2965. PN->eraseFromParent();
  2966. Changed = true;
  2967. continue;
  2968. }
  2969. if (!dominatesMergePoint(PN->getIncomingValue(0), BB, AggressiveInsts,
  2970. Cost, Budget, TTI) ||
  2971. !dominatesMergePoint(PN->getIncomingValue(1), BB, AggressiveInsts,
  2972. Cost, Budget, TTI))
  2973. return Changed;
  2974. }
  2975. // If we folded the first phi, PN dangles at this point. Refresh it. If
  2976. // we ran out of PHIs then we simplified them all.
  2977. PN = dyn_cast<PHINode>(BB->begin());
  2978. if (!PN)
  2979. return true;
  2980. // Return true if at least one of these is a 'not', and another is either
  2981. // a 'not' too, or a constant.
  2982. auto CanHoistNotFromBothValues = [](Value *V0, Value *V1) {
  2983. if (!match(V0, m_Not(m_Value())))
  2984. std::swap(V0, V1);
  2985. auto Invertible = m_CombineOr(m_Not(m_Value()), m_AnyIntegralConstant());
  2986. return match(V0, m_Not(m_Value())) && match(V1, Invertible);
  2987. };
  2988. // Don't fold i1 branches on PHIs which contain binary operators or
  2989. // (possibly inverted) select form of or/ands, unless one of
  2990. // the incoming values is an 'not' and another one is freely invertible.
  2991. // These can often be turned into switches and other things.
  2992. auto IsBinOpOrAnd = [](Value *V) {
  2993. return match(
  2994. V, m_CombineOr(
  2995. m_BinOp(),
  2996. m_CombineOr(m_Select(m_Value(), m_ImmConstant(), m_Value()),
  2997. m_Select(m_Value(), m_Value(), m_ImmConstant()))));
  2998. };
  2999. if (PN->getType()->isIntegerTy(1) &&
  3000. (IsBinOpOrAnd(PN->getIncomingValue(0)) ||
  3001. IsBinOpOrAnd(PN->getIncomingValue(1)) || IsBinOpOrAnd(IfCond)) &&
  3002. !CanHoistNotFromBothValues(PN->getIncomingValue(0),
  3003. PN->getIncomingValue(1)))
  3004. return Changed;
  3005. // If all PHI nodes are promotable, check to make sure that all instructions
  3006. // in the predecessor blocks can be promoted as well. If not, we won't be able
  3007. // to get rid of the control flow, so it's not worth promoting to select
  3008. // instructions.
  3009. for (BasicBlock *IfBlock : IfBlocks)
  3010. for (BasicBlock::iterator I = IfBlock->begin(); !I->isTerminator(); ++I)
  3011. if (!AggressiveInsts.count(&*I) && !I->isDebugOrPseudoInst()) {
  3012. // This is not an aggressive instruction that we can promote.
  3013. // Because of this, we won't be able to get rid of the control flow, so
  3014. // the xform is not worth it.
  3015. return Changed;
  3016. }
  3017. // If either of the blocks has it's address taken, we can't do this fold.
  3018. if (any_of(IfBlocks,
  3019. [](BasicBlock *IfBlock) { return IfBlock->hasAddressTaken(); }))
  3020. return Changed;
  3021. LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond
  3022. << " T: " << IfTrue->getName()
  3023. << " F: " << IfFalse->getName() << "\n");
  3024. // If we can still promote the PHI nodes after this gauntlet of tests,
  3025. // do all of the PHI's now.
  3026. // Move all 'aggressive' instructions, which are defined in the
  3027. // conditional parts of the if's up to the dominating block.
  3028. for (BasicBlock *IfBlock : IfBlocks)
  3029. hoistAllInstructionsInto(DomBlock, DomBI, IfBlock);
  3030. IRBuilder<NoFolder> Builder(DomBI);
  3031. // Propagate fast-math-flags from phi nodes to replacement selects.
  3032. IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
  3033. while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
  3034. if (isa<FPMathOperator>(PN))
  3035. Builder.setFastMathFlags(PN->getFastMathFlags());
  3036. // Change the PHI node into a select instruction.
  3037. Value *TrueVal = PN->getIncomingValueForBlock(IfTrue);
  3038. Value *FalseVal = PN->getIncomingValueForBlock(IfFalse);
  3039. Value *Sel = Builder.CreateSelect(IfCond, TrueVal, FalseVal, "", DomBI);
  3040. PN->replaceAllUsesWith(Sel);
  3041. Sel->takeName(PN);
  3042. PN->eraseFromParent();
  3043. }
  3044. // At this point, all IfBlocks are empty, so our if statement
  3045. // has been flattened. Change DomBlock to jump directly to our new block to
  3046. // avoid other simplifycfg's kicking in on the diamond.
  3047. Builder.CreateBr(BB);
  3048. SmallVector<DominatorTree::UpdateType, 3> Updates;
  3049. if (DTU) {
  3050. Updates.push_back({DominatorTree::Insert, DomBlock, BB});
  3051. for (auto *Successor : successors(DomBlock))
  3052. Updates.push_back({DominatorTree::Delete, DomBlock, Successor});
  3053. }
  3054. DomBI->eraseFromParent();
  3055. if (DTU)
  3056. DTU->applyUpdates(Updates);
  3057. return true;
  3058. }
  3059. static Value *createLogicalOp(IRBuilderBase &Builder,
  3060. Instruction::BinaryOps Opc, Value *LHS,
  3061. Value *RHS, const Twine &Name = "") {
  3062. // Try to relax logical op to binary op.
  3063. if (impliesPoison(RHS, LHS))
  3064. return Builder.CreateBinOp(Opc, LHS, RHS, Name);
  3065. if (Opc == Instruction::And)
  3066. return Builder.CreateLogicalAnd(LHS, RHS, Name);
  3067. if (Opc == Instruction::Or)
  3068. return Builder.CreateLogicalOr(LHS, RHS, Name);
  3069. llvm_unreachable("Invalid logical opcode");
  3070. }
  3071. /// Return true if either PBI or BI has branch weight available, and store
  3072. /// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does
  3073. /// not have branch weight, use 1:1 as its weight.
  3074. static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI,
  3075. uint64_t &PredTrueWeight,
  3076. uint64_t &PredFalseWeight,
  3077. uint64_t &SuccTrueWeight,
  3078. uint64_t &SuccFalseWeight) {
  3079. bool PredHasWeights =
  3080. extractBranchWeights(*PBI, PredTrueWeight, PredFalseWeight);
  3081. bool SuccHasWeights =
  3082. extractBranchWeights(*BI, SuccTrueWeight, SuccFalseWeight);
  3083. if (PredHasWeights || SuccHasWeights) {
  3084. if (!PredHasWeights)
  3085. PredTrueWeight = PredFalseWeight = 1;
  3086. if (!SuccHasWeights)
  3087. SuccTrueWeight = SuccFalseWeight = 1;
  3088. return true;
  3089. } else {
  3090. return false;
  3091. }
  3092. }
  3093. /// Determine if the two branches share a common destination and deduce a glue
  3094. /// that joins the branches' conditions to arrive at the common destination if
  3095. /// that would be profitable.
  3096. static std::optional<std::tuple<BasicBlock *, Instruction::BinaryOps, bool>>
  3097. shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI,
  3098. const TargetTransformInfo *TTI) {
  3099. assert(BI && PBI && BI->isConditional() && PBI->isConditional() &&
  3100. "Both blocks must end with a conditional branches.");
  3101. assert(is_contained(predecessors(BI->getParent()), PBI->getParent()) &&
  3102. "PredBB must be a predecessor of BB.");
  3103. // We have the potential to fold the conditions together, but if the
  3104. // predecessor branch is predictable, we may not want to merge them.
  3105. uint64_t PTWeight, PFWeight;
  3106. BranchProbability PBITrueProb, Likely;
  3107. if (TTI && !PBI->getMetadata(LLVMContext::MD_unpredictable) &&
  3108. extractBranchWeights(*PBI, PTWeight, PFWeight) &&
  3109. (PTWeight + PFWeight) != 0) {
  3110. PBITrueProb =
  3111. BranchProbability::getBranchProbability(PTWeight, PTWeight + PFWeight);
  3112. Likely = TTI->getPredictableBranchThreshold();
  3113. }
  3114. if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
  3115. // Speculate the 2nd condition unless the 1st is probably true.
  3116. if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
  3117. return {{BI->getSuccessor(0), Instruction::Or, false}};
  3118. } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
  3119. // Speculate the 2nd condition unless the 1st is probably false.
  3120. if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
  3121. return {{BI->getSuccessor(1), Instruction::And, false}};
  3122. } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
  3123. // Speculate the 2nd condition unless the 1st is probably true.
  3124. if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
  3125. return {{BI->getSuccessor(1), Instruction::And, true}};
  3126. } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
  3127. // Speculate the 2nd condition unless the 1st is probably false.
  3128. if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
  3129. return {{BI->getSuccessor(0), Instruction::Or, true}};
  3130. }
  3131. return std::nullopt;
  3132. }
  3133. static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI,
  3134. DomTreeUpdater *DTU,
  3135. MemorySSAUpdater *MSSAU,
  3136. const TargetTransformInfo *TTI) {
  3137. BasicBlock *BB = BI->getParent();
  3138. BasicBlock *PredBlock = PBI->getParent();
  3139. // Determine if the two branches share a common destination.
  3140. BasicBlock *CommonSucc;
  3141. Instruction::BinaryOps Opc;
  3142. bool InvertPredCond;
  3143. std::tie(CommonSucc, Opc, InvertPredCond) =
  3144. *shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI);
  3145. LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
  3146. IRBuilder<> Builder(PBI);
  3147. // The builder is used to create instructions to eliminate the branch in BB.
  3148. // If BB's terminator has !annotation metadata, add it to the new
  3149. // instructions.
  3150. Builder.CollectMetadataToCopy(BB->getTerminator(),
  3151. {LLVMContext::MD_annotation});
  3152. // If we need to invert the condition in the pred block to match, do so now.
  3153. if (InvertPredCond) {
  3154. Value *NewCond = PBI->getCondition();
  3155. if (NewCond->hasOneUse() && isa<CmpInst>(NewCond)) {
  3156. CmpInst *CI = cast<CmpInst>(NewCond);
  3157. CI->setPredicate(CI->getInversePredicate());
  3158. } else {
  3159. NewCond =
  3160. Builder.CreateNot(NewCond, PBI->getCondition()->getName() + ".not");
  3161. }
  3162. PBI->setCondition(NewCond);
  3163. PBI->swapSuccessors();
  3164. }
  3165. BasicBlock *UniqueSucc =
  3166. PBI->getSuccessor(0) == BB ? BI->getSuccessor(0) : BI->getSuccessor(1);
  3167. // Before cloning instructions, notify the successor basic block that it
  3168. // is about to have a new predecessor. This will update PHI nodes,
  3169. // which will allow us to update live-out uses of bonus instructions.
  3170. AddPredecessorToBlock(UniqueSucc, PredBlock, BB, MSSAU);
  3171. // Try to update branch weights.
  3172. uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
  3173. if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
  3174. SuccTrueWeight, SuccFalseWeight)) {
  3175. SmallVector<uint64_t, 8> NewWeights;
  3176. if (PBI->getSuccessor(0) == BB) {
  3177. // PBI: br i1 %x, BB, FalseDest
  3178. // BI: br i1 %y, UniqueSucc, FalseDest
  3179. // TrueWeight is TrueWeight for PBI * TrueWeight for BI.
  3180. NewWeights.push_back(PredTrueWeight * SuccTrueWeight);
  3181. // FalseWeight is FalseWeight for PBI * TotalWeight for BI +
  3182. // TrueWeight for PBI * FalseWeight for BI.
  3183. // We assume that total weights of a BranchInst can fit into 32 bits.
  3184. // Therefore, we will not have overflow using 64-bit arithmetic.
  3185. NewWeights.push_back(PredFalseWeight *
  3186. (SuccFalseWeight + SuccTrueWeight) +
  3187. PredTrueWeight * SuccFalseWeight);
  3188. } else {
  3189. // PBI: br i1 %x, TrueDest, BB
  3190. // BI: br i1 %y, TrueDest, UniqueSucc
  3191. // TrueWeight is TrueWeight for PBI * TotalWeight for BI +
  3192. // FalseWeight for PBI * TrueWeight for BI.
  3193. NewWeights.push_back(PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) +
  3194. PredFalseWeight * SuccTrueWeight);
  3195. // FalseWeight is FalseWeight for PBI * FalseWeight for BI.
  3196. NewWeights.push_back(PredFalseWeight * SuccFalseWeight);
  3197. }
  3198. // Halve the weights if any of them cannot fit in an uint32_t
  3199. FitWeights(NewWeights);
  3200. SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(), NewWeights.end());
  3201. setBranchWeights(PBI, MDWeights[0], MDWeights[1]);
  3202. // TODO: If BB is reachable from all paths through PredBlock, then we
  3203. // could replace PBI's branch probabilities with BI's.
  3204. } else
  3205. PBI->setMetadata(LLVMContext::MD_prof, nullptr);
  3206. // Now, update the CFG.
  3207. PBI->setSuccessor(PBI->getSuccessor(0) != BB, UniqueSucc);
  3208. if (DTU)
  3209. DTU->applyUpdates({{DominatorTree::Insert, PredBlock, UniqueSucc},
  3210. {DominatorTree::Delete, PredBlock, BB}});
  3211. // If BI was a loop latch, it may have had associated loop metadata.
  3212. // We need to copy it to the new latch, that is, PBI.
  3213. if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop))
  3214. PBI->setMetadata(LLVMContext::MD_loop, LoopMD);
  3215. ValueToValueMapTy VMap; // maps original values to cloned values
  3216. CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BB, PredBlock, VMap);
  3217. // Now that the Cond was cloned into the predecessor basic block,
  3218. // or/and the two conditions together.
  3219. Value *BICond = VMap[BI->getCondition()];
  3220. PBI->setCondition(
  3221. createLogicalOp(Builder, Opc, PBI->getCondition(), BICond, "or.cond"));
  3222. // Copy any debug value intrinsics into the end of PredBlock.
  3223. for (Instruction &I : *BB) {
  3224. if (isa<DbgInfoIntrinsic>(I)) {
  3225. Instruction *NewI = I.clone();
  3226. RemapInstruction(NewI, VMap,
  3227. RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
  3228. NewI->insertBefore(PBI);
  3229. }
  3230. }
  3231. ++NumFoldBranchToCommonDest;
  3232. return true;
  3233. }
  3234. /// Return if an instruction's type or any of its operands' types are a vector
  3235. /// type.
  3236. static bool isVectorOp(Instruction &I) {
  3237. return I.getType()->isVectorTy() || any_of(I.operands(), [](Use &U) {
  3238. return U->getType()->isVectorTy();
  3239. });
  3240. }
  3241. /// If this basic block is simple enough, and if a predecessor branches to us
  3242. /// and one of our successors, fold the block into the predecessor and use
  3243. /// logical operations to pick the right destination.
  3244. bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
  3245. MemorySSAUpdater *MSSAU,
  3246. const TargetTransformInfo *TTI,
  3247. unsigned BonusInstThreshold) {
  3248. // If this block ends with an unconditional branch,
  3249. // let SpeculativelyExecuteBB() deal with it.
  3250. if (!BI->isConditional())
  3251. return false;
  3252. BasicBlock *BB = BI->getParent();
  3253. TargetTransformInfo::TargetCostKind CostKind =
  3254. BB->getParent()->hasMinSize() ? TargetTransformInfo::TCK_CodeSize
  3255. : TargetTransformInfo::TCK_SizeAndLatency;
  3256. Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
  3257. if (!Cond ||
  3258. (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond) &&
  3259. !isa<SelectInst>(Cond)) ||
  3260. Cond->getParent() != BB || !Cond->hasOneUse())
  3261. return false;
  3262. // Finally, don't infinitely unroll conditional loops.
  3263. if (is_contained(successors(BB), BB))
  3264. return false;
  3265. // With which predecessors will we want to deal with?
  3266. SmallVector<BasicBlock *, 8> Preds;
  3267. for (BasicBlock *PredBlock : predecessors(BB)) {
  3268. BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());
  3269. // Check that we have two conditional branches. If there is a PHI node in
  3270. // the common successor, verify that the same value flows in from both
  3271. // blocks.
  3272. if (!PBI || PBI->isUnconditional() || !SafeToMergeTerminators(BI, PBI))
  3273. continue;
  3274. // Determine if the two branches share a common destination.
  3275. BasicBlock *CommonSucc;
  3276. Instruction::BinaryOps Opc;
  3277. bool InvertPredCond;
  3278. if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI))
  3279. std::tie(CommonSucc, Opc, InvertPredCond) = *Recipe;
  3280. else
  3281. continue;
  3282. // Check the cost of inserting the necessary logic before performing the
  3283. // transformation.
  3284. if (TTI) {
  3285. Type *Ty = BI->getCondition()->getType();
  3286. InstructionCost Cost = TTI->getArithmeticInstrCost(Opc, Ty, CostKind);
  3287. if (InvertPredCond && (!PBI->getCondition()->hasOneUse() ||
  3288. !isa<CmpInst>(PBI->getCondition())))
  3289. Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind);
  3290. if (Cost > BranchFoldThreshold)
  3291. continue;
  3292. }
  3293. // Ok, we do want to deal with this predecessor. Record it.
  3294. Preds.emplace_back(PredBlock);
  3295. }
  3296. // If there aren't any predecessors into which we can fold,
  3297. // don't bother checking the cost.
  3298. if (Preds.empty())
  3299. return false;
  3300. // Only allow this transformation if computing the condition doesn't involve
  3301. // too many instructions and these involved instructions can be executed
  3302. // unconditionally. We denote all involved instructions except the condition
  3303. // as "bonus instructions", and only allow this transformation when the
  3304. // number of the bonus instructions we'll need to create when cloning into
  3305. // each predecessor does not exceed a certain threshold.
  3306. unsigned NumBonusInsts = 0;
  3307. bool SawVectorOp = false;
  3308. const unsigned PredCount = Preds.size();
  3309. for (Instruction &I : *BB) {
  3310. // Don't check the branch condition comparison itself.
  3311. if (&I == Cond)
  3312. continue;
  3313. // Ignore dbg intrinsics, and the terminator.
  3314. if (isa<DbgInfoIntrinsic>(I) || isa<BranchInst>(I))
  3315. continue;
  3316. // I must be safe to execute unconditionally.
  3317. if (!isSafeToSpeculativelyExecute(&I))
  3318. return false;
  3319. SawVectorOp |= isVectorOp(I);
  3320. // Account for the cost of duplicating this instruction into each
  3321. // predecessor. Ignore free instructions.
  3322. if (!TTI || TTI->getInstructionCost(&I, CostKind) !=
  3323. TargetTransformInfo::TCC_Free) {
  3324. NumBonusInsts += PredCount;
  3325. // Early exits once we reach the limit.
  3326. if (NumBonusInsts >
  3327. BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
  3328. return false;
  3329. }
  3330. auto IsBCSSAUse = [BB, &I](Use &U) {
  3331. auto *UI = cast<Instruction>(U.getUser());
  3332. if (auto *PN = dyn_cast<PHINode>(UI))
  3333. return PN->getIncomingBlock(U) == BB;
  3334. return UI->getParent() == BB && I.comesBefore(UI);
  3335. };
  3336. // Does this instruction require rewriting of uses?
  3337. if (!all_of(I.uses(), IsBCSSAUse))
  3338. return false;
  3339. }
  3340. if (NumBonusInsts >
  3341. BonusInstThreshold *
  3342. (SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1))
  3343. return false;
  3344. // Ok, we have the budget. Perform the transformation.
  3345. for (BasicBlock *PredBlock : Preds) {
  3346. auto *PBI = cast<BranchInst>(PredBlock->getTerminator());
  3347. return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI);
  3348. }
  3349. return false;
  3350. }
  3351. // If there is only one store in BB1 and BB2, return it, otherwise return
  3352. // nullptr.
  3353. static StoreInst *findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2) {
  3354. StoreInst *S = nullptr;
  3355. for (auto *BB : {BB1, BB2}) {
  3356. if (!BB)
  3357. continue;
  3358. for (auto &I : *BB)
  3359. if (auto *SI = dyn_cast<StoreInst>(&I)) {
  3360. if (S)
  3361. // Multiple stores seen.
  3362. return nullptr;
  3363. else
  3364. S = SI;
  3365. }
  3366. }
  3367. return S;
  3368. }
  3369. static Value *ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB,
  3370. Value *AlternativeV = nullptr) {
  3371. // PHI is going to be a PHI node that allows the value V that is defined in
  3372. // BB to be referenced in BB's only successor.
  3373. //
  3374. // If AlternativeV is nullptr, the only value we care about in PHI is V. It
  3375. // doesn't matter to us what the other operand is (it'll never get used). We
  3376. // could just create a new PHI with an undef incoming value, but that could
  3377. // increase register pressure if EarlyCSE/InstCombine can't fold it with some
  3378. // other PHI. So here we directly look for some PHI in BB's successor with V
  3379. // as an incoming operand. If we find one, we use it, else we create a new
  3380. // one.
  3381. //
  3382. // If AlternativeV is not nullptr, we care about both incoming values in PHI.
  3383. // PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV]
  3384. // where OtherBB is the single other predecessor of BB's only successor.
  3385. PHINode *PHI = nullptr;
  3386. BasicBlock *Succ = BB->getSingleSuccessor();
  3387. for (auto I = Succ->begin(); isa<PHINode>(I); ++I)
  3388. if (cast<PHINode>(I)->getIncomingValueForBlock(BB) == V) {
  3389. PHI = cast<PHINode>(I);
  3390. if (!AlternativeV)
  3391. break;
  3392. assert(Succ->hasNPredecessors(2));
  3393. auto PredI = pred_begin(Succ);
  3394. BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI;
  3395. if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV)
  3396. break;
  3397. PHI = nullptr;
  3398. }
  3399. if (PHI)
  3400. return PHI;
  3401. // If V is not an instruction defined in BB, just return it.
  3402. if (!AlternativeV &&
  3403. (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != BB))
  3404. return V;
  3405. PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge", &Succ->front());
  3406. PHI->addIncoming(V, BB);
  3407. for (BasicBlock *PredBB : predecessors(Succ))
  3408. if (PredBB != BB)
  3409. PHI->addIncoming(
  3410. AlternativeV ? AlternativeV : UndefValue::get(V->getType()), PredBB);
  3411. return PHI;
  3412. }
  3413. static bool mergeConditionalStoreToAddress(
  3414. BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB,
  3415. BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond,
  3416. DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) {
  3417. // For every pointer, there must be exactly two stores, one coming from
  3418. // PTB or PFB, and the other from QTB or QFB. We don't support more than one
  3419. // store (to any address) in PTB,PFB or QTB,QFB.
  3420. // FIXME: We could relax this restriction with a bit more work and performance
  3421. // testing.
  3422. StoreInst *PStore = findUniqueStoreInBlocks(PTB, PFB);
  3423. StoreInst *QStore = findUniqueStoreInBlocks(QTB, QFB);
  3424. if (!PStore || !QStore)
  3425. return false;
  3426. // Now check the stores are compatible.
  3427. if (!QStore->isUnordered() || !PStore->isUnordered() ||
  3428. PStore->getValueOperand()->getType() !=
  3429. QStore->getValueOperand()->getType())
  3430. return false;
  3431. // Check that sinking the store won't cause program behavior changes. Sinking
  3432. // the store out of the Q blocks won't change any behavior as we're sinking
  3433. // from a block to its unconditional successor. But we're moving a store from
  3434. // the P blocks down through the middle block (QBI) and past both QFB and QTB.
  3435. // So we need to check that there are no aliasing loads or stores in
  3436. // QBI, QTB and QFB. We also need to check there are no conflicting memory
  3437. // operations between PStore and the end of its parent block.
  3438. //
  3439. // The ideal way to do this is to query AliasAnalysis, but we don't
  3440. // preserve AA currently so that is dangerous. Be super safe and just
  3441. // check there are no other memory operations at all.
  3442. for (auto &I : *QFB->getSinglePredecessor())
  3443. if (I.mayReadOrWriteMemory())
  3444. return false;
  3445. for (auto &I : *QFB)
  3446. if (&I != QStore && I.mayReadOrWriteMemory())
  3447. return false;
  3448. if (QTB)
  3449. for (auto &I : *QTB)
  3450. if (&I != QStore && I.mayReadOrWriteMemory())
  3451. return false;
  3452. for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end();
  3453. I != E; ++I)
  3454. if (&*I != PStore && I->mayReadOrWriteMemory())
  3455. return false;
  3456. // If we're not in aggressive mode, we only optimize if we have some
  3457. // confidence that by optimizing we'll allow P and/or Q to be if-converted.
  3458. auto IsWorthwhile = [&](BasicBlock *BB, ArrayRef<StoreInst *> FreeStores) {
  3459. if (!BB)
  3460. return true;
  3461. // Heuristic: if the block can be if-converted/phi-folded and the
  3462. // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
  3463. // thread this store.
  3464. InstructionCost Cost = 0;
  3465. InstructionCost Budget =
  3466. PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
  3467. for (auto &I : BB->instructionsWithoutDebug(false)) {
  3468. // Consider terminator instruction to be free.
  3469. if (I.isTerminator())
  3470. continue;
  3471. // If this is one the stores that we want to speculate out of this BB,
  3472. // then don't count it's cost, consider it to be free.
  3473. if (auto *S = dyn_cast<StoreInst>(&I))
  3474. if (llvm::find(FreeStores, S))
  3475. continue;
  3476. // Else, we have a white-list of instructions that we are ak speculating.
  3477. if (!isa<BinaryOperator>(I) && !isa<GetElementPtrInst>(I))
  3478. return false; // Not in white-list - not worthwhile folding.
  3479. // And finally, if this is a non-free instruction that we are okay
  3480. // speculating, ensure that we consider the speculation budget.
  3481. Cost +=
  3482. TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
  3483. if (Cost > Budget)
  3484. return false; // Eagerly refuse to fold as soon as we're out of budget.
  3485. }
  3486. assert(Cost <= Budget &&
  3487. "When we run out of budget we will eagerly return from within the "
  3488. "per-instruction loop.");
  3489. return true;
  3490. };
  3491. const std::array<StoreInst *, 2> FreeStores = {PStore, QStore};
  3492. if (!MergeCondStoresAggressively &&
  3493. (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) ||
  3494. !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores)))
  3495. return false;
  3496. // If PostBB has more than two predecessors, we need to split it so we can
  3497. // sink the store.
  3498. if (std::next(pred_begin(PostBB), 2) != pred_end(PostBB)) {
  3499. // We know that QFB's only successor is PostBB. And QFB has a single
  3500. // predecessor. If QTB exists, then its only successor is also PostBB.
  3501. // If QTB does not exist, then QFB's only predecessor has a conditional
  3502. // branch to QFB and PostBB.
  3503. BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor();
  3504. BasicBlock *NewBB =
  3505. SplitBlockPredecessors(PostBB, {QFB, TruePred}, "condstore.split", DTU);
  3506. if (!NewBB)
  3507. return false;
  3508. PostBB = NewBB;
  3509. }
  3510. // OK, we're going to sink the stores to PostBB. The store has to be
  3511. // conditional though, so first create the predicate.
  3512. Value *PCond = cast<BranchInst>(PFB->getSinglePredecessor()->getTerminator())
  3513. ->getCondition();
  3514. Value *QCond = cast<BranchInst>(QFB->getSinglePredecessor()->getTerminator())
  3515. ->getCondition();
  3516. Value *PPHI = ensureValueAvailableInSuccessor(PStore->getValueOperand(),
  3517. PStore->getParent());
  3518. Value *QPHI = ensureValueAvailableInSuccessor(QStore->getValueOperand(),
  3519. QStore->getParent(), PPHI);
  3520. IRBuilder<> QB(&*PostBB->getFirstInsertionPt());
  3521. Value *PPred = PStore->getParent() == PTB ? PCond : QB.CreateNot(PCond);
  3522. Value *QPred = QStore->getParent() == QTB ? QCond : QB.CreateNot(QCond);
  3523. if (InvertPCond)
  3524. PPred = QB.CreateNot(PPred);
  3525. if (InvertQCond)
  3526. QPred = QB.CreateNot(QPred);
  3527. Value *CombinedPred = QB.CreateOr(PPred, QPred);
  3528. auto *T = SplitBlockAndInsertIfThen(CombinedPred, &*QB.GetInsertPoint(),
  3529. /*Unreachable=*/false,
  3530. /*BranchWeights=*/nullptr, DTU);
  3531. QB.SetInsertPoint(T);
  3532. StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address));
  3533. SI->setAAMetadata(PStore->getAAMetadata().merge(QStore->getAAMetadata()));
  3534. // Choose the minimum alignment. If we could prove both stores execute, we
  3535. // could use biggest one. In this case, though, we only know that one of the
  3536. // stores executes. And we don't know it's safe to take the alignment from a
  3537. // store that doesn't execute.
  3538. SI->setAlignment(std::min(PStore->getAlign(), QStore->getAlign()));
  3539. QStore->eraseFromParent();
  3540. PStore->eraseFromParent();
  3541. return true;
  3542. }
  3543. static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI,
  3544. DomTreeUpdater *DTU, const DataLayout &DL,
  3545. const TargetTransformInfo &TTI) {
  3546. // The intention here is to find diamonds or triangles (see below) where each
  3547. // conditional block contains a store to the same address. Both of these
  3548. // stores are conditional, so they can't be unconditionally sunk. But it may
  3549. // be profitable to speculatively sink the stores into one merged store at the
  3550. // end, and predicate the merged store on the union of the two conditions of
  3551. // PBI and QBI.
  3552. //
  3553. // This can reduce the number of stores executed if both of the conditions are
  3554. // true, and can allow the blocks to become small enough to be if-converted.
  3555. // This optimization will also chain, so that ladders of test-and-set
  3556. // sequences can be if-converted away.
  3557. //
  3558. // We only deal with simple diamonds or triangles:
  3559. //
  3560. // PBI or PBI or a combination of the two
  3561. // / \ | \
  3562. // PTB PFB | PFB
  3563. // \ / | /
  3564. // QBI QBI
  3565. // / \ | \
  3566. // QTB QFB | QFB
  3567. // \ / | /
  3568. // PostBB PostBB
  3569. //
  3570. // We model triangles as a type of diamond with a nullptr "true" block.
  3571. // Triangles are canonicalized so that the fallthrough edge is represented by
  3572. // a true condition, as in the diagram above.
  3573. BasicBlock *PTB = PBI->getSuccessor(0);
  3574. BasicBlock *PFB = PBI->getSuccessor(1);
  3575. BasicBlock *QTB = QBI->getSuccessor(0);
  3576. BasicBlock *QFB = QBI->getSuccessor(1);
  3577. BasicBlock *PostBB = QFB->getSingleSuccessor();
  3578. // Make sure we have a good guess for PostBB. If QTB's only successor is
  3579. // QFB, then QFB is a better PostBB.
  3580. if (QTB->getSingleSuccessor() == QFB)
  3581. PostBB = QFB;
  3582. // If we couldn't find a good PostBB, stop.
  3583. if (!PostBB)
  3584. return false;
  3585. bool InvertPCond = false, InvertQCond = false;
  3586. // Canonicalize fallthroughs to the true branches.
  3587. if (PFB == QBI->getParent()) {
  3588. std::swap(PFB, PTB);
  3589. InvertPCond = true;
  3590. }
  3591. if (QFB == PostBB) {
  3592. std::swap(QFB, QTB);
  3593. InvertQCond = true;
  3594. }
  3595. // From this point on we can assume PTB or QTB may be fallthroughs but PFB
  3596. // and QFB may not. Model fallthroughs as a nullptr block.
  3597. if (PTB == QBI->getParent())
  3598. PTB = nullptr;
  3599. if (QTB == PostBB)
  3600. QTB = nullptr;
  3601. // Legality bailouts. We must have at least the non-fallthrough blocks and
  3602. // the post-dominating block, and the non-fallthroughs must only have one
  3603. // predecessor.
  3604. auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) {
  3605. return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S;
  3606. };
  3607. if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||
  3608. !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB))
  3609. return false;
  3610. if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) ||
  3611. (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB)))
  3612. return false;
  3613. if (!QBI->getParent()->hasNUses(2))
  3614. return false;
  3615. // OK, this is a sequence of two diamonds or triangles.
  3616. // Check if there are stores in PTB or PFB that are repeated in QTB or QFB.
  3617. SmallPtrSet<Value *, 4> PStoreAddresses, QStoreAddresses;
  3618. for (auto *BB : {PTB, PFB}) {
  3619. if (!BB)
  3620. continue;
  3621. for (auto &I : *BB)
  3622. if (StoreInst *SI = dyn_cast<StoreInst>(&I))
  3623. PStoreAddresses.insert(SI->getPointerOperand());
  3624. }
  3625. for (auto *BB : {QTB, QFB}) {
  3626. if (!BB)
  3627. continue;
  3628. for (auto &I : *BB)
  3629. if (StoreInst *SI = dyn_cast<StoreInst>(&I))
  3630. QStoreAddresses.insert(SI->getPointerOperand());
  3631. }
  3632. set_intersect(PStoreAddresses, QStoreAddresses);
  3633. // set_intersect mutates PStoreAddresses in place. Rename it here to make it
  3634. // clear what it contains.
  3635. auto &CommonAddresses = PStoreAddresses;
  3636. bool Changed = false;
  3637. for (auto *Address : CommonAddresses)
  3638. Changed |=
  3639. mergeConditionalStoreToAddress(PTB, PFB, QTB, QFB, PostBB, Address,
  3640. InvertPCond, InvertQCond, DTU, DL, TTI);
  3641. return Changed;
  3642. }
  3643. /// If the previous block ended with a widenable branch, determine if reusing
  3644. /// the target block is profitable and legal. This will have the effect of
  3645. /// "widening" PBI, but doesn't require us to reason about hosting safety.
  3646. static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
  3647. DomTreeUpdater *DTU) {
  3648. // TODO: This can be generalized in two important ways:
  3649. // 1) We can allow phi nodes in IfFalseBB and simply reuse all the input
  3650. // values from the PBI edge.
  3651. // 2) We can sink side effecting instructions into BI's fallthrough
  3652. // successor provided they doesn't contribute to computation of
  3653. // BI's condition.
  3654. Value *CondWB, *WC;
  3655. BasicBlock *IfTrueBB, *IfFalseBB;
  3656. if (!parseWidenableBranch(PBI, CondWB, WC, IfTrueBB, IfFalseBB) ||
  3657. IfTrueBB != BI->getParent() || !BI->getParent()->getSinglePredecessor())
  3658. return false;
  3659. if (!IfFalseBB->phis().empty())
  3660. return false; // TODO
  3661. // This helps avoid infinite loop with SimplifyCondBranchToCondBranch which
  3662. // may undo the transform done here.
  3663. // TODO: There might be a more fine-grained solution to this.
  3664. if (!llvm::succ_empty(IfFalseBB))
  3665. return false;
  3666. // Use lambda to lazily compute expensive condition after cheap ones.
  3667. auto NoSideEffects = [](BasicBlock &BB) {
  3668. return llvm::none_of(BB, [](const Instruction &I) {
  3669. return I.mayWriteToMemory() || I.mayHaveSideEffects();
  3670. });
  3671. };
  3672. if (BI->getSuccessor(1) != IfFalseBB && // no inf looping
  3673. BI->getSuccessor(1)->getTerminatingDeoptimizeCall() && // profitability
  3674. NoSideEffects(*BI->getParent())) {
  3675. auto *OldSuccessor = BI->getSuccessor(1);
  3676. OldSuccessor->removePredecessor(BI->getParent());
  3677. BI->setSuccessor(1, IfFalseBB);
  3678. if (DTU)
  3679. DTU->applyUpdates(
  3680. {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
  3681. {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
  3682. return true;
  3683. }
  3684. if (BI->getSuccessor(0) != IfFalseBB && // no inf looping
  3685. BI->getSuccessor(0)->getTerminatingDeoptimizeCall() && // profitability
  3686. NoSideEffects(*BI->getParent())) {
  3687. auto *OldSuccessor = BI->getSuccessor(0);
  3688. OldSuccessor->removePredecessor(BI->getParent());
  3689. BI->setSuccessor(0, IfFalseBB);
  3690. if (DTU)
  3691. DTU->applyUpdates(
  3692. {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
  3693. {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
  3694. return true;
  3695. }
  3696. return false;
  3697. }
  3698. /// If we have a conditional branch as a predecessor of another block,
  3699. /// this function tries to simplify it. We know
  3700. /// that PBI and BI are both conditional branches, and BI is in one of the
  3701. /// successor blocks of PBI - PBI branches to BI.
  3702. static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
  3703. DomTreeUpdater *DTU,
  3704. const DataLayout &DL,
  3705. const TargetTransformInfo &TTI) {
  3706. assert(PBI->isConditional() && BI->isConditional());
  3707. BasicBlock *BB = BI->getParent();
  3708. // If this block ends with a branch instruction, and if there is a
  3709. // predecessor that ends on a branch of the same condition, make
  3710. // this conditional branch redundant.
  3711. if (PBI->getCondition() == BI->getCondition() &&
  3712. PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
  3713. // Okay, the outcome of this conditional branch is statically
  3714. // knowable. If this block had a single pred, handle specially, otherwise
  3715. // FoldCondBranchOnValueKnownInPredecessor() will handle it.
  3716. if (BB->getSinglePredecessor()) {
  3717. // Turn this into a branch on constant.
  3718. bool CondIsTrue = PBI->getSuccessor(0) == BB;
  3719. BI->setCondition(
  3720. ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue));
  3721. return true; // Nuke the branch on constant.
  3722. }
  3723. }
  3724. // If the previous block ended with a widenable branch, determine if reusing
  3725. // the target block is profitable and legal. This will have the effect of
  3726. // "widening" PBI, but doesn't require us to reason about hosting safety.
  3727. if (tryWidenCondBranchToCondBranch(PBI, BI, DTU))
  3728. return true;
  3729. // If both branches are conditional and both contain stores to the same
  3730. // address, remove the stores from the conditionals and create a conditional
  3731. // merged store at the end.
  3732. if (MergeCondStores && mergeConditionalStores(PBI, BI, DTU, DL, TTI))
  3733. return true;
  3734. // If this is a conditional branch in an empty block, and if any
  3735. // predecessors are a conditional branch to one of our destinations,
  3736. // fold the conditions into logical ops and one cond br.
  3737. // Ignore dbg intrinsics.
  3738. if (&*BB->instructionsWithoutDebug(false).begin() != BI)
  3739. return false;
  3740. int PBIOp, BIOp;
  3741. if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
  3742. PBIOp = 0;
  3743. BIOp = 0;
  3744. } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
  3745. PBIOp = 0;
  3746. BIOp = 1;
  3747. } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
  3748. PBIOp = 1;
  3749. BIOp = 0;
  3750. } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
  3751. PBIOp = 1;
  3752. BIOp = 1;
  3753. } else {
  3754. return false;
  3755. }
  3756. // Check to make sure that the other destination of this branch
  3757. // isn't BB itself. If so, this is an infinite loop that will
  3758. // keep getting unwound.
  3759. if (PBI->getSuccessor(PBIOp) == BB)
  3760. return false;
  3761. // Do not perform this transformation if it would require
  3762. // insertion of a large number of select instructions. For targets
  3763. // without predication/cmovs, this is a big pessimization.
  3764. BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
  3765. BasicBlock *RemovedDest = PBI->getSuccessor(PBIOp ^ 1);
  3766. unsigned NumPhis = 0;
  3767. for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(II);
  3768. ++II, ++NumPhis) {
  3769. if (NumPhis > 2) // Disable this xform.
  3770. return false;
  3771. }
  3772. // Finally, if everything is ok, fold the branches to logical ops.
  3773. BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1);
  3774. LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
  3775. << "AND: " << *BI->getParent());
  3776. SmallVector<DominatorTree::UpdateType, 5> Updates;
  3777. // If OtherDest *is* BB, then BB is a basic block with a single conditional
  3778. // branch in it, where one edge (OtherDest) goes back to itself but the other
  3779. // exits. We don't *know* that the program avoids the infinite loop
  3780. // (even though that seems likely). If we do this xform naively, we'll end up
  3781. // recursively unpeeling the loop. Since we know that (after the xform is
  3782. // done) that the block *is* infinite if reached, we just make it an obviously
  3783. // infinite loop with no cond branch.
  3784. if (OtherDest == BB) {
  3785. // Insert it at the end of the function, because it's either code,
  3786. // or it won't matter if it's hot. :)
  3787. BasicBlock *InfLoopBlock =
  3788. BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
  3789. BranchInst::Create(InfLoopBlock, InfLoopBlock);
  3790. if (DTU)
  3791. Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
  3792. OtherDest = InfLoopBlock;
  3793. }
  3794. LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
  3795. // BI may have other predecessors. Because of this, we leave
  3796. // it alone, but modify PBI.
  3797. // Make sure we get to CommonDest on True&True directions.
  3798. Value *PBICond = PBI->getCondition();
  3799. IRBuilder<NoFolder> Builder(PBI);
  3800. if (PBIOp)
  3801. PBICond = Builder.CreateNot(PBICond, PBICond->getName() + ".not");
  3802. Value *BICond = BI->getCondition();
  3803. if (BIOp)
  3804. BICond = Builder.CreateNot(BICond, BICond->getName() + ".not");
  3805. // Merge the conditions.
  3806. Value *Cond =
  3807. createLogicalOp(Builder, Instruction::Or, PBICond, BICond, "brmerge");
  3808. // Modify PBI to branch on the new condition to the new dests.
  3809. PBI->setCondition(Cond);
  3810. PBI->setSuccessor(0, CommonDest);
  3811. PBI->setSuccessor(1, OtherDest);
  3812. if (DTU) {
  3813. Updates.push_back({DominatorTree::Insert, PBI->getParent(), OtherDest});
  3814. Updates.push_back({DominatorTree::Delete, PBI->getParent(), RemovedDest});
  3815. DTU->applyUpdates(Updates);
  3816. }
  3817. // Update branch weight for PBI.
  3818. uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
  3819. uint64_t PredCommon, PredOther, SuccCommon, SuccOther;
  3820. bool HasWeights =
  3821. extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
  3822. SuccTrueWeight, SuccFalseWeight);
  3823. if (HasWeights) {
  3824. PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
  3825. PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
  3826. SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
  3827. SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
  3828. // The weight to CommonDest should be PredCommon * SuccTotal +
  3829. // PredOther * SuccCommon.
  3830. // The weight to OtherDest should be PredOther * SuccOther.
  3831. uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) +
  3832. PredOther * SuccCommon,
  3833. PredOther * SuccOther};
  3834. // Halve the weights if any of them cannot fit in an uint32_t
  3835. FitWeights(NewWeights);
  3836. setBranchWeights(PBI, NewWeights[0], NewWeights[1]);
  3837. }
  3838. // OtherDest may have phi nodes. If so, add an entry from PBI's
  3839. // block that are identical to the entries for BI's block.
  3840. AddPredecessorToBlock(OtherDest, PBI->getParent(), BB);
  3841. // We know that the CommonDest already had an edge from PBI to
  3842. // it. If it has PHIs though, the PHIs may have different
  3843. // entries for BB and PBI's BB. If so, insert a select to make
  3844. // them agree.
  3845. for (PHINode &PN : CommonDest->phis()) {
  3846. Value *BIV = PN.getIncomingValueForBlock(BB);
  3847. unsigned PBBIdx = PN.getBasicBlockIndex(PBI->getParent());
  3848. Value *PBIV = PN.getIncomingValue(PBBIdx);
  3849. if (BIV != PBIV) {
  3850. // Insert a select in PBI to pick the right value.
  3851. SelectInst *NV = cast<SelectInst>(
  3852. Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName() + ".mux"));
  3853. PN.setIncomingValue(PBBIdx, NV);
  3854. // Although the select has the same condition as PBI, the original branch
  3855. // weights for PBI do not apply to the new select because the select's
  3856. // 'logical' edges are incoming edges of the phi that is eliminated, not
  3857. // the outgoing edges of PBI.
  3858. if (HasWeights) {
  3859. uint64_t PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
  3860. uint64_t PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
  3861. uint64_t SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
  3862. uint64_t SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
  3863. // The weight to PredCommonDest should be PredCommon * SuccTotal.
  3864. // The weight to PredOtherDest should be PredOther * SuccCommon.
  3865. uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther),
  3866. PredOther * SuccCommon};
  3867. FitWeights(NewWeights);
  3868. setBranchWeights(NV, NewWeights[0], NewWeights[1]);
  3869. }
  3870. }
  3871. }
  3872. LLVM_DEBUG(dbgs() << "INTO: " << *PBI->getParent());
  3873. LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
  3874. // This basic block is probably dead. We know it has at least
  3875. // one fewer predecessor.
  3876. return true;
  3877. }
  3878. // Simplifies a terminator by replacing it with a branch to TrueBB if Cond is
  3879. // true or to FalseBB if Cond is false.
  3880. // Takes care of updating the successors and removing the old terminator.
  3881. // Also makes sure not to introduce new successors by assuming that edges to
  3882. // non-successor TrueBBs and FalseBBs aren't reachable.
  3883. bool SimplifyCFGOpt::SimplifyTerminatorOnSelect(Instruction *OldTerm,
  3884. Value *Cond, BasicBlock *TrueBB,
  3885. BasicBlock *FalseBB,
  3886. uint32_t TrueWeight,
  3887. uint32_t FalseWeight) {
  3888. auto *BB = OldTerm->getParent();
  3889. // Remove any superfluous successor edges from the CFG.
  3890. // First, figure out which successors to preserve.
  3891. // If TrueBB and FalseBB are equal, only try to preserve one copy of that
  3892. // successor.
  3893. BasicBlock *KeepEdge1 = TrueBB;
  3894. BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
  3895. SmallSetVector<BasicBlock *, 2> RemovedSuccessors;
  3896. // Then remove the rest.
  3897. for (BasicBlock *Succ : successors(OldTerm)) {
  3898. // Make sure only to keep exactly one copy of each edge.
  3899. if (Succ == KeepEdge1)
  3900. KeepEdge1 = nullptr;
  3901. else if (Succ == KeepEdge2)
  3902. KeepEdge2 = nullptr;
  3903. else {
  3904. Succ->removePredecessor(BB,
  3905. /*KeepOneInputPHIs=*/true);
  3906. if (Succ != TrueBB && Succ != FalseBB)
  3907. RemovedSuccessors.insert(Succ);
  3908. }
  3909. }
  3910. IRBuilder<> Builder(OldTerm);
  3911. Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
  3912. // Insert an appropriate new terminator.
  3913. if (!KeepEdge1 && !KeepEdge2) {
  3914. if (TrueBB == FalseBB) {
  3915. // We were only looking for one successor, and it was present.
  3916. // Create an unconditional branch to it.
  3917. Builder.CreateBr(TrueBB);
  3918. } else {
  3919. // We found both of the successors we were looking for.
  3920. // Create a conditional branch sharing the condition of the select.
  3921. BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB);
  3922. if (TrueWeight != FalseWeight)
  3923. setBranchWeights(NewBI, TrueWeight, FalseWeight);
  3924. }
  3925. } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
  3926. // Neither of the selected blocks were successors, so this
  3927. // terminator must be unreachable.
  3928. new UnreachableInst(OldTerm->getContext(), OldTerm);
  3929. } else {
  3930. // One of the selected values was a successor, but the other wasn't.
  3931. // Insert an unconditional branch to the one that was found;
  3932. // the edge to the one that wasn't must be unreachable.
  3933. if (!KeepEdge1) {
  3934. // Only TrueBB was found.
  3935. Builder.CreateBr(TrueBB);
  3936. } else {
  3937. // Only FalseBB was found.
  3938. Builder.CreateBr(FalseBB);
  3939. }
  3940. }
  3941. EraseTerminatorAndDCECond(OldTerm);
  3942. if (DTU) {
  3943. SmallVector<DominatorTree::UpdateType, 2> Updates;
  3944. Updates.reserve(RemovedSuccessors.size());
  3945. for (auto *RemovedSuccessor : RemovedSuccessors)
  3946. Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor});
  3947. DTU->applyUpdates(Updates);
  3948. }
  3949. return true;
  3950. }
  3951. // Replaces
  3952. // (switch (select cond, X, Y)) on constant X, Y
  3953. // with a branch - conditional if X and Y lead to distinct BBs,
  3954. // unconditional otherwise.
  3955. bool SimplifyCFGOpt::SimplifySwitchOnSelect(SwitchInst *SI,
  3956. SelectInst *Select) {
  3957. // Check for constant integer values in the select.
  3958. ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue());
  3959. ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue());
  3960. if (!TrueVal || !FalseVal)
  3961. return false;
  3962. // Find the relevant condition and destinations.
  3963. Value *Condition = Select->getCondition();
  3964. BasicBlock *TrueBB = SI->findCaseValue(TrueVal)->getCaseSuccessor();
  3965. BasicBlock *FalseBB = SI->findCaseValue(FalseVal)->getCaseSuccessor();
  3966. // Get weight for TrueBB and FalseBB.
  3967. uint32_t TrueWeight = 0, FalseWeight = 0;
  3968. SmallVector<uint64_t, 8> Weights;
  3969. bool HasWeights = hasBranchWeightMD(*SI);
  3970. if (HasWeights) {
  3971. GetBranchWeights(SI, Weights);
  3972. if (Weights.size() == 1 + SI->getNumCases()) {
  3973. TrueWeight =
  3974. (uint32_t)Weights[SI->findCaseValue(TrueVal)->getSuccessorIndex()];
  3975. FalseWeight =
  3976. (uint32_t)Weights[SI->findCaseValue(FalseVal)->getSuccessorIndex()];
  3977. }
  3978. }
  3979. // Perform the actual simplification.
  3980. return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB, TrueWeight,
  3981. FalseWeight);
  3982. }
  3983. // Replaces
  3984. // (indirectbr (select cond, blockaddress(@fn, BlockA),
  3985. // blockaddress(@fn, BlockB)))
  3986. // with
  3987. // (br cond, BlockA, BlockB).
  3988. bool SimplifyCFGOpt::SimplifyIndirectBrOnSelect(IndirectBrInst *IBI,
  3989. SelectInst *SI) {
  3990. // Check that both operands of the select are block addresses.
  3991. BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue());
  3992. BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue());
  3993. if (!TBA || !FBA)
  3994. return false;
  3995. // Extract the actual blocks.
  3996. BasicBlock *TrueBB = TBA->getBasicBlock();
  3997. BasicBlock *FalseBB = FBA->getBasicBlock();
  3998. // Perform the actual simplification.
  3999. return SimplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB, 0,
  4000. 0);
  4001. }
  4002. /// This is called when we find an icmp instruction
  4003. /// (a seteq/setne with a constant) as the only instruction in a
  4004. /// block that ends with an uncond branch. We are looking for a very specific
  4005. /// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In
  4006. /// this case, we merge the first two "or's of icmp" into a switch, but then the
  4007. /// default value goes to an uncond block with a seteq in it, we get something
  4008. /// like:
  4009. ///
  4010. /// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ]
  4011. /// DEFAULT:
  4012. /// %tmp = icmp eq i8 %A, 92
  4013. /// br label %end
  4014. /// end:
  4015. /// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
  4016. ///
  4017. /// We prefer to split the edge to 'end' so that there is a true/false entry to
  4018. /// the PHI, merging the third icmp into the switch.
  4019. bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
  4020. ICmpInst *ICI, IRBuilder<> &Builder) {
  4021. BasicBlock *BB = ICI->getParent();
  4022. // If the block has any PHIs in it or the icmp has multiple uses, it is too
  4023. // complex.
  4024. if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse())
  4025. return false;
  4026. Value *V = ICI->getOperand(0);
  4027. ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1));
  4028. // The pattern we're looking for is where our only predecessor is a switch on
  4029. // 'V' and this block is the default case for the switch. In this case we can
  4030. // fold the compared value into the switch to simplify things.
  4031. BasicBlock *Pred = BB->getSinglePredecessor();
  4032. if (!Pred || !isa<SwitchInst>(Pred->getTerminator()))
  4033. return false;
  4034. SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
  4035. if (SI->getCondition() != V)
  4036. return false;
  4037. // If BB is reachable on a non-default case, then we simply know the value of
  4038. // V in this block. Substitute it and constant fold the icmp instruction
  4039. // away.
  4040. if (SI->getDefaultDest() != BB) {
  4041. ConstantInt *VVal = SI->findCaseDest(BB);
  4042. assert(VVal && "Should have a unique destination value");
  4043. ICI->setOperand(0, VVal);
  4044. if (Value *V = simplifyInstruction(ICI, {DL, ICI})) {
  4045. ICI->replaceAllUsesWith(V);
  4046. ICI->eraseFromParent();
  4047. }
  4048. // BB is now empty, so it is likely to simplify away.
  4049. return requestResimplify();
  4050. }
  4051. // Ok, the block is reachable from the default dest. If the constant we're
  4052. // comparing exists in one of the other edges, then we can constant fold ICI
  4053. // and zap it.
  4054. if (SI->findCaseValue(Cst) != SI->case_default()) {
  4055. Value *V;
  4056. if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
  4057. V = ConstantInt::getFalse(BB->getContext());
  4058. else
  4059. V = ConstantInt::getTrue(BB->getContext());
  4060. ICI->replaceAllUsesWith(V);
  4061. ICI->eraseFromParent();
  4062. // BB is now empty, so it is likely to simplify away.
  4063. return requestResimplify();
  4064. }
  4065. // The use of the icmp has to be in the 'end' block, by the only PHI node in
  4066. // the block.
  4067. BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
  4068. PHINode *PHIUse = dyn_cast<PHINode>(ICI->user_back());
  4069. if (PHIUse == nullptr || PHIUse != &SuccBlock->front() ||
  4070. isa<PHINode>(++BasicBlock::iterator(PHIUse)))
  4071. return false;
  4072. // If the icmp is a SETEQ, then the default dest gets false, the new edge gets
  4073. // true in the PHI.
  4074. Constant *DefaultCst = ConstantInt::getTrue(BB->getContext());
  4075. Constant *NewCst = ConstantInt::getFalse(BB->getContext());
  4076. if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
  4077. std::swap(DefaultCst, NewCst);
  4078. // Replace ICI (which is used by the PHI for the default value) with true or
  4079. // false depending on if it is EQ or NE.
  4080. ICI->replaceAllUsesWith(DefaultCst);
  4081. ICI->eraseFromParent();
  4082. SmallVector<DominatorTree::UpdateType, 2> Updates;
  4083. // Okay, the switch goes to this block on a default value. Add an edge from
  4084. // the switch to the merge point on the compared value.
  4085. BasicBlock *NewBB =
  4086. BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB);
  4087. {
  4088. SwitchInstProfUpdateWrapper SIW(*SI);
  4089. auto W0 = SIW.getSuccessorWeight(0);
  4090. SwitchInstProfUpdateWrapper::CaseWeightOpt NewW;
  4091. if (W0) {
  4092. NewW = ((uint64_t(*W0) + 1) >> 1);
  4093. SIW.setSuccessorWeight(0, *NewW);
  4094. }
  4095. SIW.addCase(Cst, NewBB, NewW);
  4096. if (DTU)
  4097. Updates.push_back({DominatorTree::Insert, Pred, NewBB});
  4098. }
  4099. // NewBB branches to the phi block, add the uncond branch and the phi entry.
  4100. Builder.SetInsertPoint(NewBB);
  4101. Builder.SetCurrentDebugLocation(SI->getDebugLoc());
  4102. Builder.CreateBr(SuccBlock);
  4103. PHIUse->addIncoming(NewCst, NewBB);
  4104. if (DTU) {
  4105. Updates.push_back({DominatorTree::Insert, NewBB, SuccBlock});
  4106. DTU->applyUpdates(Updates);
  4107. }
  4108. return true;
  4109. }
  4110. /// The specified branch is a conditional branch.
  4111. /// Check to see if it is branching on an or/and chain of icmp instructions, and
  4112. /// fold it into a switch instruction if so.
  4113. bool SimplifyCFGOpt::SimplifyBranchOnICmpChain(BranchInst *BI,
  4114. IRBuilder<> &Builder,
  4115. const DataLayout &DL) {
  4116. Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
  4117. if (!Cond)
  4118. return false;
  4119. // Change br (X == 0 | X == 1), T, F into a switch instruction.
  4120. // If this is a bunch of seteq's or'd together, or if it's a bunch of
  4121. // 'setne's and'ed together, collect them.
  4122. // Try to gather values from a chain of and/or to be turned into a switch
  4123. ConstantComparesGatherer ConstantCompare(Cond, DL);
  4124. // Unpack the result
  4125. SmallVectorImpl<ConstantInt *> &Values = ConstantCompare.Vals;
  4126. Value *CompVal = ConstantCompare.CompValue;
  4127. unsigned UsedICmps = ConstantCompare.UsedICmps;
  4128. Value *ExtraCase = ConstantCompare.Extra;
  4129. // If we didn't have a multiply compared value, fail.
  4130. if (!CompVal)
  4131. return false;
  4132. // Avoid turning single icmps into a switch.
  4133. if (UsedICmps <= 1)
  4134. return false;
  4135. bool TrueWhenEqual = match(Cond, m_LogicalOr(m_Value(), m_Value()));
  4136. // There might be duplicate constants in the list, which the switch
  4137. // instruction can't handle, remove them now.
  4138. array_pod_sort(Values.begin(), Values.end(), ConstantIntSortPredicate);
  4139. Values.erase(std::unique(Values.begin(), Values.end()), Values.end());
  4140. // If Extra was used, we require at least two switch values to do the
  4141. // transformation. A switch with one value is just a conditional branch.
  4142. if (ExtraCase && Values.size() < 2)
  4143. return false;
  4144. // TODO: Preserve branch weight metadata, similarly to how
  4145. // FoldValueComparisonIntoPredecessors preserves it.
  4146. // Figure out which block is which destination.
  4147. BasicBlock *DefaultBB = BI->getSuccessor(1);
  4148. BasicBlock *EdgeBB = BI->getSuccessor(0);
  4149. if (!TrueWhenEqual)
  4150. std::swap(DefaultBB, EdgeBB);
  4151. BasicBlock *BB = BI->getParent();
  4152. LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
  4153. << " cases into SWITCH. BB is:\n"
  4154. << *BB);
  4155. SmallVector<DominatorTree::UpdateType, 2> Updates;
  4156. // If there are any extra values that couldn't be folded into the switch
  4157. // then we evaluate them with an explicit branch first. Split the block
  4158. // right before the condbr to handle it.
  4159. if (ExtraCase) {
  4160. BasicBlock *NewBB = SplitBlock(BB, BI, DTU, /*LI=*/nullptr,
  4161. /*MSSAU=*/nullptr, "switch.early.test");
  4162. // Remove the uncond branch added to the old block.
  4163. Instruction *OldTI = BB->getTerminator();
  4164. Builder.SetInsertPoint(OldTI);
  4165. // There can be an unintended UB if extra values are Poison. Before the
  4166. // transformation, extra values may not be evaluated according to the
  4167. // condition, and it will not raise UB. But after transformation, we are
  4168. // evaluating extra values before checking the condition, and it will raise
  4169. // UB. It can be solved by adding freeze instruction to extra values.
  4170. AssumptionCache *AC = Options.AC;
  4171. if (!isGuaranteedNotToBeUndefOrPoison(ExtraCase, AC, BI, nullptr))
  4172. ExtraCase = Builder.CreateFreeze(ExtraCase);
  4173. if (TrueWhenEqual)
  4174. Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB);
  4175. else
  4176. Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
  4177. OldTI->eraseFromParent();
  4178. if (DTU)
  4179. Updates.push_back({DominatorTree::Insert, BB, EdgeBB});
  4180. // If there are PHI nodes in EdgeBB, then we need to add a new entry to them
  4181. // for the edge we just added.
  4182. AddPredecessorToBlock(EdgeBB, BB, NewBB);
  4183. LLVM_DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
  4184. << "\nEXTRABB = " << *BB);
  4185. BB = NewBB;
  4186. }
  4187. Builder.SetInsertPoint(BI);
  4188. // Convert pointer to int before we switch.
  4189. if (CompVal->getType()->isPointerTy()) {
  4190. CompVal = Builder.CreatePtrToInt(
  4191. CompVal, DL.getIntPtrType(CompVal->getType()), "magicptr");
  4192. }
  4193. // Create the new switch instruction now.
  4194. SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size());
  4195. // Add all of the 'cases' to the switch instruction.
  4196. for (unsigned i = 0, e = Values.size(); i != e; ++i)
  4197. New->addCase(Values[i], EdgeBB);
  4198. // We added edges from PI to the EdgeBB. As such, if there were any
  4199. // PHI nodes in EdgeBB, they need entries to be added corresponding to
  4200. // the number of edges added.
  4201. for (BasicBlock::iterator BBI = EdgeBB->begin(); isa<PHINode>(BBI); ++BBI) {
  4202. PHINode *PN = cast<PHINode>(BBI);
  4203. Value *InVal = PN->getIncomingValueForBlock(BB);
  4204. for (unsigned i = 0, e = Values.size() - 1; i != e; ++i)
  4205. PN->addIncoming(InVal, BB);
  4206. }
  4207. // Erase the old branch instruction.
  4208. EraseTerminatorAndDCECond(BI);
  4209. if (DTU)
  4210. DTU->applyUpdates(Updates);
  4211. LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
  4212. return true;
  4213. }
  4214. bool SimplifyCFGOpt::simplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
  4215. if (isa<PHINode>(RI->getValue()))
  4216. return simplifyCommonResume(RI);
  4217. else if (isa<LandingPadInst>(RI->getParent()->getFirstNonPHI()) &&
  4218. RI->getValue() == RI->getParent()->getFirstNonPHI())
  4219. // The resume must unwind the exception that caused control to branch here.
  4220. return simplifySingleResume(RI);
  4221. return false;
  4222. }
  4223. // Check if cleanup block is empty
  4224. static bool isCleanupBlockEmpty(iterator_range<BasicBlock::iterator> R) {
  4225. for (Instruction &I : R) {
  4226. auto *II = dyn_cast<IntrinsicInst>(&I);
  4227. if (!II)
  4228. return false;
  4229. Intrinsic::ID IntrinsicID = II->getIntrinsicID();
  4230. switch (IntrinsicID) {
  4231. case Intrinsic::dbg_declare:
  4232. case Intrinsic::dbg_value:
  4233. case Intrinsic::dbg_label:
  4234. case Intrinsic::lifetime_end:
  4235. break;
  4236. default:
  4237. return false;
  4238. }
  4239. }
  4240. return true;
  4241. }
  4242. // Simplify resume that is shared by several landing pads (phi of landing pad).
  4243. bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
  4244. BasicBlock *BB = RI->getParent();
  4245. // Check that there are no other instructions except for debug and lifetime
  4246. // intrinsics between the phi's and resume instruction.
  4247. if (!isCleanupBlockEmpty(
  4248. make_range(RI->getParent()->getFirstNonPHI(), BB->getTerminator())))
  4249. return false;
  4250. SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks;
  4251. auto *PhiLPInst = cast<PHINode>(RI->getValue());
  4252. // Check incoming blocks to see if any of them are trivial.
  4253. for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues(); Idx != End;
  4254. Idx++) {
  4255. auto *IncomingBB = PhiLPInst->getIncomingBlock(Idx);
  4256. auto *IncomingValue = PhiLPInst->getIncomingValue(Idx);
  4257. // If the block has other successors, we can not delete it because
  4258. // it has other dependents.
  4259. if (IncomingBB->getUniqueSuccessor() != BB)
  4260. continue;
  4261. auto *LandingPad = dyn_cast<LandingPadInst>(IncomingBB->getFirstNonPHI());
  4262. // Not the landing pad that caused the control to branch here.
  4263. if (IncomingValue != LandingPad)
  4264. continue;
  4265. if (isCleanupBlockEmpty(
  4266. make_range(LandingPad->getNextNode(), IncomingBB->getTerminator())))
  4267. TrivialUnwindBlocks.insert(IncomingBB);
  4268. }
  4269. // If no trivial unwind blocks, don't do any simplifications.
  4270. if (TrivialUnwindBlocks.empty())
  4271. return false;
  4272. // Turn all invokes that unwind here into calls.
  4273. for (auto *TrivialBB : TrivialUnwindBlocks) {
  4274. // Blocks that will be simplified should be removed from the phi node.
  4275. // Note there could be multiple edges to the resume block, and we need
  4276. // to remove them all.
  4277. while (PhiLPInst->getBasicBlockIndex(TrivialBB) != -1)
  4278. BB->removePredecessor(TrivialBB, true);
  4279. for (BasicBlock *Pred :
  4280. llvm::make_early_inc_range(predecessors(TrivialBB))) {
  4281. removeUnwindEdge(Pred, DTU);
  4282. ++NumInvokes;
  4283. }
  4284. // In each SimplifyCFG run, only the current processed block can be erased.
  4285. // Otherwise, it will break the iteration of SimplifyCFG pass. So instead
  4286. // of erasing TrivialBB, we only remove the branch to the common resume
  4287. // block so that we can later erase the resume block since it has no
  4288. // predecessors.
  4289. TrivialBB->getTerminator()->eraseFromParent();
  4290. new UnreachableInst(RI->getContext(), TrivialBB);
  4291. if (DTU)
  4292. DTU->applyUpdates({{DominatorTree::Delete, TrivialBB, BB}});
  4293. }
  4294. // Delete the resume block if all its predecessors have been removed.
  4295. if (pred_empty(BB))
  4296. DeleteDeadBlock(BB, DTU);
  4297. return !TrivialUnwindBlocks.empty();
  4298. }
  4299. // Simplify resume that is only used by a single (non-phi) landing pad.
  4300. bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) {
  4301. BasicBlock *BB = RI->getParent();
  4302. auto *LPInst = cast<LandingPadInst>(BB->getFirstNonPHI());
  4303. assert(RI->getValue() == LPInst &&
  4304. "Resume must unwind the exception that caused control to here");
  4305. // Check that there are no other instructions except for debug intrinsics.
  4306. if (!isCleanupBlockEmpty(
  4307. make_range<Instruction *>(LPInst->getNextNode(), RI)))
  4308. return false;
  4309. // Turn all invokes that unwind here into calls and delete the basic block.
  4310. for (BasicBlock *Pred : llvm::make_early_inc_range(predecessors(BB))) {
  4311. removeUnwindEdge(Pred, DTU);
  4312. ++NumInvokes;
  4313. }
  4314. // The landingpad is now unreachable. Zap it.
  4315. DeleteDeadBlock(BB, DTU);
  4316. return true;
  4317. }
  4318. static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU) {
  4319. // If this is a trivial cleanup pad that executes no instructions, it can be
  4320. // eliminated. If the cleanup pad continues to the caller, any predecessor
  4321. // that is an EH pad will be updated to continue to the caller and any
  4322. // predecessor that terminates with an invoke instruction will have its invoke
  4323. // instruction converted to a call instruction. If the cleanup pad being
  4324. // simplified does not continue to the caller, each predecessor will be
  4325. // updated to continue to the unwind destination of the cleanup pad being
  4326. // simplified.
  4327. BasicBlock *BB = RI->getParent();
  4328. CleanupPadInst *CPInst = RI->getCleanupPad();
  4329. if (CPInst->getParent() != BB)
  4330. // This isn't an empty cleanup.
  4331. return false;
  4332. // We cannot kill the pad if it has multiple uses. This typically arises
  4333. // from unreachable basic blocks.
  4334. if (!CPInst->hasOneUse())
  4335. return false;
  4336. // Check that there are no other instructions except for benign intrinsics.
  4337. if (!isCleanupBlockEmpty(
  4338. make_range<Instruction *>(CPInst->getNextNode(), RI)))
  4339. return false;
  4340. // If the cleanup return we are simplifying unwinds to the caller, this will
  4341. // set UnwindDest to nullptr.
  4342. BasicBlock *UnwindDest = RI->getUnwindDest();
  4343. Instruction *DestEHPad = UnwindDest ? UnwindDest->getFirstNonPHI() : nullptr;
  4344. // We're about to remove BB from the control flow. Before we do, sink any
  4345. // PHINodes into the unwind destination. Doing this before changing the
  4346. // control flow avoids some potentially slow checks, since we can currently
  4347. // be certain that UnwindDest and BB have no common predecessors (since they
  4348. // are both EH pads).
  4349. if (UnwindDest) {
  4350. // First, go through the PHI nodes in UnwindDest and update any nodes that
  4351. // reference the block we are removing
  4352. for (PHINode &DestPN : UnwindDest->phis()) {
  4353. int Idx = DestPN.getBasicBlockIndex(BB);
  4354. // Since BB unwinds to UnwindDest, it has to be in the PHI node.
  4355. assert(Idx != -1);
  4356. // This PHI node has an incoming value that corresponds to a control
  4357. // path through the cleanup pad we are removing. If the incoming
  4358. // value is in the cleanup pad, it must be a PHINode (because we
  4359. // verified above that the block is otherwise empty). Otherwise, the
  4360. // value is either a constant or a value that dominates the cleanup
  4361. // pad being removed.
  4362. //
  4363. // Because BB and UnwindDest are both EH pads, all of their
  4364. // predecessors must unwind to these blocks, and since no instruction
  4365. // can have multiple unwind destinations, there will be no overlap in
  4366. // incoming blocks between SrcPN and DestPN.
  4367. Value *SrcVal = DestPN.getIncomingValue(Idx);
  4368. PHINode *SrcPN = dyn_cast<PHINode>(SrcVal);
  4369. bool NeedPHITranslation = SrcPN && SrcPN->getParent() == BB;
  4370. for (auto *Pred : predecessors(BB)) {
  4371. Value *Incoming =
  4372. NeedPHITranslation ? SrcPN->getIncomingValueForBlock(Pred) : SrcVal;
  4373. DestPN.addIncoming(Incoming, Pred);
  4374. }
  4375. }
  4376. // Sink any remaining PHI nodes directly into UnwindDest.
  4377. Instruction *InsertPt = DestEHPad;
  4378. for (PHINode &PN : make_early_inc_range(BB->phis())) {
  4379. if (PN.use_empty() || !PN.isUsedOutsideOfBlock(BB))
  4380. // If the PHI node has no uses or all of its uses are in this basic
  4381. // block (meaning they are debug or lifetime intrinsics), just leave
  4382. // it. It will be erased when we erase BB below.
  4383. continue;
  4384. // Otherwise, sink this PHI node into UnwindDest.
  4385. // Any predecessors to UnwindDest which are not already represented
  4386. // must be back edges which inherit the value from the path through
  4387. // BB. In this case, the PHI value must reference itself.
  4388. for (auto *pred : predecessors(UnwindDest))
  4389. if (pred != BB)
  4390. PN.addIncoming(&PN, pred);
  4391. PN.moveBefore(InsertPt);
  4392. // Also, add a dummy incoming value for the original BB itself,
  4393. // so that the PHI is well-formed until we drop said predecessor.
  4394. PN.addIncoming(PoisonValue::get(PN.getType()), BB);
  4395. }
  4396. }
  4397. std::vector<DominatorTree::UpdateType> Updates;
  4398. // We use make_early_inc_range here because we will remove all predecessors.
  4399. for (BasicBlock *PredBB : llvm::make_early_inc_range(predecessors(BB))) {
  4400. if (UnwindDest == nullptr) {
  4401. if (DTU) {
  4402. DTU->applyUpdates(Updates);
  4403. Updates.clear();
  4404. }
  4405. removeUnwindEdge(PredBB, DTU);
  4406. ++NumInvokes;
  4407. } else {
  4408. BB->removePredecessor(PredBB);
  4409. Instruction *TI = PredBB->getTerminator();
  4410. TI->replaceUsesOfWith(BB, UnwindDest);
  4411. if (DTU) {
  4412. Updates.push_back({DominatorTree::Insert, PredBB, UnwindDest});
  4413. Updates.push_back({DominatorTree::Delete, PredBB, BB});
  4414. }
  4415. }
  4416. }
  4417. if (DTU)
  4418. DTU->applyUpdates(Updates);
  4419. DeleteDeadBlock(BB, DTU);
  4420. return true;
  4421. }
  4422. // Try to merge two cleanuppads together.
  4423. static bool mergeCleanupPad(CleanupReturnInst *RI) {
  4424. // Skip any cleanuprets which unwind to caller, there is nothing to merge
  4425. // with.
  4426. BasicBlock *UnwindDest = RI->getUnwindDest();
  4427. if (!UnwindDest)
  4428. return false;
  4429. // This cleanupret isn't the only predecessor of this cleanuppad, it wouldn't
  4430. // be safe to merge without code duplication.
  4431. if (UnwindDest->getSinglePredecessor() != RI->getParent())
  4432. return false;
  4433. // Verify that our cleanuppad's unwind destination is another cleanuppad.
  4434. auto *SuccessorCleanupPad = dyn_cast<CleanupPadInst>(&UnwindDest->front());
  4435. if (!SuccessorCleanupPad)
  4436. return false;
  4437. CleanupPadInst *PredecessorCleanupPad = RI->getCleanupPad();
  4438. // Replace any uses of the successor cleanupad with the predecessor pad
  4439. // The only cleanuppad uses should be this cleanupret, it's cleanupret and
  4440. // funclet bundle operands.
  4441. SuccessorCleanupPad->replaceAllUsesWith(PredecessorCleanupPad);
  4442. // Remove the old cleanuppad.
  4443. SuccessorCleanupPad->eraseFromParent();
  4444. // Now, we simply replace the cleanupret with a branch to the unwind
  4445. // destination.
  4446. BranchInst::Create(UnwindDest, RI->getParent());
  4447. RI->eraseFromParent();
  4448. return true;
  4449. }
  4450. bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) {
  4451. // It is possible to transiantly have an undef cleanuppad operand because we
  4452. // have deleted some, but not all, dead blocks.
  4453. // Eventually, this block will be deleted.
  4454. if (isa<UndefValue>(RI->getOperand(0)))
  4455. return false;
  4456. if (mergeCleanupPad(RI))
  4457. return true;
  4458. if (removeEmptyCleanup(RI, DTU))
  4459. return true;
  4460. return false;
  4461. }
  4462. // WARNING: keep in sync with InstCombinerImpl::visitUnreachableInst()!
  4463. bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
  4464. BasicBlock *BB = UI->getParent();
  4465. bool Changed = false;
  4466. // If there are any instructions immediately before the unreachable that can
  4467. // be removed, do so.
  4468. while (UI->getIterator() != BB->begin()) {
  4469. BasicBlock::iterator BBI = UI->getIterator();
  4470. --BBI;
  4471. if (!isGuaranteedToTransferExecutionToSuccessor(&*BBI))
  4472. break; // Can not drop any more instructions. We're done here.
  4473. // Otherwise, this instruction can be freely erased,
  4474. // even if it is not side-effect free.
  4475. // Note that deleting EH's here is in fact okay, although it involves a bit
  4476. // of subtle reasoning. If this inst is an EH, all the predecessors of this
  4477. // block will be the unwind edges of Invoke/CatchSwitch/CleanupReturn,
  4478. // and we can therefore guarantee this block will be erased.
  4479. // Delete this instruction (any uses are guaranteed to be dead)
  4480. BBI->replaceAllUsesWith(PoisonValue::get(BBI->getType()));
  4481. BBI->eraseFromParent();
  4482. Changed = true;
  4483. }
  4484. // If the unreachable instruction is the first in the block, take a gander
  4485. // at all of the predecessors of this instruction, and simplify them.
  4486. if (&BB->front() != UI)
  4487. return Changed;
  4488. std::vector<DominatorTree::UpdateType> Updates;
  4489. SmallSetVector<BasicBlock *, 8> Preds(pred_begin(BB), pred_end(BB));
  4490. for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
  4491. auto *Predecessor = Preds[i];
  4492. Instruction *TI = Predecessor->getTerminator();
  4493. IRBuilder<> Builder(TI);
  4494. if (auto *BI = dyn_cast<BranchInst>(TI)) {
  4495. // We could either have a proper unconditional branch,
  4496. // or a degenerate conditional branch with matching destinations.
  4497. if (all_of(BI->successors(),
  4498. [BB](auto *Successor) { return Successor == BB; })) {
  4499. new UnreachableInst(TI->getContext(), TI);
  4500. TI->eraseFromParent();
  4501. Changed = true;
  4502. } else {
  4503. assert(BI->isConditional() && "Can't get here with an uncond branch.");
  4504. Value* Cond = BI->getCondition();
  4505. assert(BI->getSuccessor(0) != BI->getSuccessor(1) &&
  4506. "The destinations are guaranteed to be different here.");
  4507. if (BI->getSuccessor(0) == BB) {
  4508. Builder.CreateAssumption(Builder.CreateNot(Cond));
  4509. Builder.CreateBr(BI->getSuccessor(1));
  4510. } else {
  4511. assert(BI->getSuccessor(1) == BB && "Incorrect CFG");
  4512. Builder.CreateAssumption(Cond);
  4513. Builder.CreateBr(BI->getSuccessor(0));
  4514. }
  4515. EraseTerminatorAndDCECond(BI);
  4516. Changed = true;
  4517. }
  4518. if (DTU)
  4519. Updates.push_back({DominatorTree::Delete, Predecessor, BB});
  4520. } else if (auto *SI = dyn_cast<SwitchInst>(TI)) {
  4521. SwitchInstProfUpdateWrapper SU(*SI);
  4522. for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) {
  4523. if (i->getCaseSuccessor() != BB) {
  4524. ++i;
  4525. continue;
  4526. }
  4527. BB->removePredecessor(SU->getParent());
  4528. i = SU.removeCase(i);
  4529. e = SU->case_end();
  4530. Changed = true;
  4531. }
  4532. // Note that the default destination can't be removed!
  4533. if (DTU && SI->getDefaultDest() != BB)
  4534. Updates.push_back({DominatorTree::Delete, Predecessor, BB});
  4535. } else if (auto *II = dyn_cast<InvokeInst>(TI)) {
  4536. if (II->getUnwindDest() == BB) {
  4537. if (DTU) {
  4538. DTU->applyUpdates(Updates);
  4539. Updates.clear();
  4540. }
  4541. auto *CI = cast<CallInst>(removeUnwindEdge(TI->getParent(), DTU));
  4542. if (!CI->doesNotThrow())
  4543. CI->setDoesNotThrow();
  4544. Changed = true;
  4545. }
  4546. } else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) {
  4547. if (CSI->getUnwindDest() == BB) {
  4548. if (DTU) {
  4549. DTU->applyUpdates(Updates);
  4550. Updates.clear();
  4551. }
  4552. removeUnwindEdge(TI->getParent(), DTU);
  4553. Changed = true;
  4554. continue;
  4555. }
  4556. for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(),
  4557. E = CSI->handler_end();
  4558. I != E; ++I) {
  4559. if (*I == BB) {
  4560. CSI->removeHandler(I);
  4561. --I;
  4562. --E;
  4563. Changed = true;
  4564. }
  4565. }
  4566. if (DTU)
  4567. Updates.push_back({DominatorTree::Delete, Predecessor, BB});
  4568. if (CSI->getNumHandlers() == 0) {
  4569. if (CSI->hasUnwindDest()) {
  4570. // Redirect all predecessors of the block containing CatchSwitchInst
  4571. // to instead branch to the CatchSwitchInst's unwind destination.
  4572. if (DTU) {
  4573. for (auto *PredecessorOfPredecessor : predecessors(Predecessor)) {
  4574. Updates.push_back({DominatorTree::Insert,
  4575. PredecessorOfPredecessor,
  4576. CSI->getUnwindDest()});
  4577. Updates.push_back({DominatorTree::Delete,
  4578. PredecessorOfPredecessor, Predecessor});
  4579. }
  4580. }
  4581. Predecessor->replaceAllUsesWith(CSI->getUnwindDest());
  4582. } else {
  4583. // Rewrite all preds to unwind to caller (or from invoke to call).
  4584. if (DTU) {
  4585. DTU->applyUpdates(Updates);
  4586. Updates.clear();
  4587. }
  4588. SmallVector<BasicBlock *, 8> EHPreds(predecessors(Predecessor));
  4589. for (BasicBlock *EHPred : EHPreds)
  4590. removeUnwindEdge(EHPred, DTU);
  4591. }
  4592. // The catchswitch is no longer reachable.
  4593. new UnreachableInst(CSI->getContext(), CSI);
  4594. CSI->eraseFromParent();
  4595. Changed = true;
  4596. }
  4597. } else if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) {
  4598. (void)CRI;
  4599. assert(CRI->hasUnwindDest() && CRI->getUnwindDest() == BB &&
  4600. "Expected to always have an unwind to BB.");
  4601. if (DTU)
  4602. Updates.push_back({DominatorTree::Delete, Predecessor, BB});
  4603. new UnreachableInst(TI->getContext(), TI);
  4604. TI->eraseFromParent();
  4605. Changed = true;
  4606. }
  4607. }
  4608. if (DTU)
  4609. DTU->applyUpdates(Updates);
  4610. // If this block is now dead, remove it.
  4611. if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) {
  4612. DeleteDeadBlock(BB, DTU);
  4613. return true;
  4614. }
  4615. return Changed;
  4616. }
  4617. static bool CasesAreContiguous(SmallVectorImpl<ConstantInt *> &Cases) {
  4618. assert(Cases.size() >= 1);
  4619. array_pod_sort(Cases.begin(), Cases.end(), ConstantIntSortPredicate);
  4620. for (size_t I = 1, E = Cases.size(); I != E; ++I) {
  4621. if (Cases[I - 1]->getValue() != Cases[I]->getValue() + 1)
  4622. return false;
  4623. }
  4624. return true;
  4625. }
  4626. static void createUnreachableSwitchDefault(SwitchInst *Switch,
  4627. DomTreeUpdater *DTU) {
  4628. LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
  4629. auto *BB = Switch->getParent();
  4630. auto *OrigDefaultBlock = Switch->getDefaultDest();
  4631. OrigDefaultBlock->removePredecessor(BB);
  4632. BasicBlock *NewDefaultBlock = BasicBlock::Create(
  4633. BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(),
  4634. OrigDefaultBlock);
  4635. new UnreachableInst(Switch->getContext(), NewDefaultBlock);
  4636. Switch->setDefaultDest(&*NewDefaultBlock);
  4637. if (DTU) {
  4638. SmallVector<DominatorTree::UpdateType, 2> Updates;
  4639. Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock});
  4640. if (!is_contained(successors(BB), OrigDefaultBlock))
  4641. Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock});
  4642. DTU->applyUpdates(Updates);
  4643. }
  4644. }
  4645. /// Turn a switch into an integer range comparison and branch.
  4646. /// Switches with more than 2 destinations are ignored.
  4647. /// Switches with 1 destination are also ignored.
  4648. bool SimplifyCFGOpt::TurnSwitchRangeIntoICmp(SwitchInst *SI,
  4649. IRBuilder<> &Builder) {
  4650. assert(SI->getNumCases() > 1 && "Degenerate switch?");
  4651. bool HasDefault =
  4652. !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
  4653. auto *BB = SI->getParent();
  4654. // Partition the cases into two sets with different destinations.
  4655. BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr;
  4656. BasicBlock *DestB = nullptr;
  4657. SmallVector<ConstantInt *, 16> CasesA;
  4658. SmallVector<ConstantInt *, 16> CasesB;
  4659. for (auto Case : SI->cases()) {
  4660. BasicBlock *Dest = Case.getCaseSuccessor();
  4661. if (!DestA)
  4662. DestA = Dest;
  4663. if (Dest == DestA) {
  4664. CasesA.push_back(Case.getCaseValue());
  4665. continue;
  4666. }
  4667. if (!DestB)
  4668. DestB = Dest;
  4669. if (Dest == DestB) {
  4670. CasesB.push_back(Case.getCaseValue());
  4671. continue;
  4672. }
  4673. return false; // More than two destinations.
  4674. }
  4675. if (!DestB)
  4676. return false; // All destinations are the same and the default is unreachable
  4677. assert(DestA && DestB &&
  4678. "Single-destination switch should have been folded.");
  4679. assert(DestA != DestB);
  4680. assert(DestB != SI->getDefaultDest());
  4681. assert(!CasesB.empty() && "There must be non-default cases.");
  4682. assert(!CasesA.empty() || HasDefault);
  4683. // Figure out if one of the sets of cases form a contiguous range.
  4684. SmallVectorImpl<ConstantInt *> *ContiguousCases = nullptr;
  4685. BasicBlock *ContiguousDest = nullptr;
  4686. BasicBlock *OtherDest = nullptr;
  4687. if (!CasesA.empty() && CasesAreContiguous(CasesA)) {
  4688. ContiguousCases = &CasesA;
  4689. ContiguousDest = DestA;
  4690. OtherDest = DestB;
  4691. } else if (CasesAreContiguous(CasesB)) {
  4692. ContiguousCases = &CasesB;
  4693. ContiguousDest = DestB;
  4694. OtherDest = DestA;
  4695. } else
  4696. return false;
  4697. // Start building the compare and branch.
  4698. Constant *Offset = ConstantExpr::getNeg(ContiguousCases->back());
  4699. Constant *NumCases =
  4700. ConstantInt::get(Offset->getType(), ContiguousCases->size());
  4701. Value *Sub = SI->getCondition();
  4702. if (!Offset->isNullValue())
  4703. Sub = Builder.CreateAdd(Sub, Offset, Sub->getName() + ".off");
  4704. Value *Cmp;
  4705. // If NumCases overflowed, then all possible values jump to the successor.
  4706. if (NumCases->isNullValue() && !ContiguousCases->empty())
  4707. Cmp = ConstantInt::getTrue(SI->getContext());
  4708. else
  4709. Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
  4710. BranchInst *NewBI = Builder.CreateCondBr(Cmp, ContiguousDest, OtherDest);
  4711. // Update weight for the newly-created conditional branch.
  4712. if (hasBranchWeightMD(*SI)) {
  4713. SmallVector<uint64_t, 8> Weights;
  4714. GetBranchWeights(SI, Weights);
  4715. if (Weights.size() == 1 + SI->getNumCases()) {
  4716. uint64_t TrueWeight = 0;
  4717. uint64_t FalseWeight = 0;
  4718. for (size_t I = 0, E = Weights.size(); I != E; ++I) {
  4719. if (SI->getSuccessor(I) == ContiguousDest)
  4720. TrueWeight += Weights[I];
  4721. else
  4722. FalseWeight += Weights[I];
  4723. }
  4724. while (TrueWeight > UINT32_MAX || FalseWeight > UINT32_MAX) {
  4725. TrueWeight /= 2;
  4726. FalseWeight /= 2;
  4727. }
  4728. setBranchWeights(NewBI, TrueWeight, FalseWeight);
  4729. }
  4730. }
  4731. // Prune obsolete incoming values off the successors' PHI nodes.
  4732. for (auto BBI = ContiguousDest->begin(); isa<PHINode>(BBI); ++BBI) {
  4733. unsigned PreviousEdges = ContiguousCases->size();
  4734. if (ContiguousDest == SI->getDefaultDest())
  4735. ++PreviousEdges;
  4736. for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
  4737. cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
  4738. }
  4739. for (auto BBI = OtherDest->begin(); isa<PHINode>(BBI); ++BBI) {
  4740. unsigned PreviousEdges = SI->getNumCases() - ContiguousCases->size();
  4741. if (OtherDest == SI->getDefaultDest())
  4742. ++PreviousEdges;
  4743. for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
  4744. cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
  4745. }
  4746. // Clean up the default block - it may have phis or other instructions before
  4747. // the unreachable terminator.
  4748. if (!HasDefault)
  4749. createUnreachableSwitchDefault(SI, DTU);
  4750. auto *UnreachableDefault = SI->getDefaultDest();
  4751. // Drop the switch.
  4752. SI->eraseFromParent();
  4753. if (!HasDefault && DTU)
  4754. DTU->applyUpdates({{DominatorTree::Delete, BB, UnreachableDefault}});
  4755. return true;
  4756. }
  4757. /// Compute masked bits for the condition of a switch
  4758. /// and use it to remove dead cases.
  4759. static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU,
  4760. AssumptionCache *AC,
  4761. const DataLayout &DL) {
  4762. Value *Cond = SI->getCondition();
  4763. KnownBits Known = computeKnownBits(Cond, DL, 0, AC, SI);
  4764. // We can also eliminate cases by determining that their values are outside of
  4765. // the limited range of the condition based on how many significant (non-sign)
  4766. // bits are in the condition value.
  4767. unsigned MaxSignificantBitsInCond =
  4768. ComputeMaxSignificantBits(Cond, DL, 0, AC, SI);
  4769. // Gather dead cases.
  4770. SmallVector<ConstantInt *, 8> DeadCases;
  4771. SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
  4772. SmallVector<BasicBlock *, 8> UniqueSuccessors;
  4773. for (const auto &Case : SI->cases()) {
  4774. auto *Successor = Case.getCaseSuccessor();
  4775. if (DTU) {
  4776. if (!NumPerSuccessorCases.count(Successor))
  4777. UniqueSuccessors.push_back(Successor);
  4778. ++NumPerSuccessorCases[Successor];
  4779. }
  4780. const APInt &CaseVal = Case.getCaseValue()->getValue();
  4781. if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) ||
  4782. (CaseVal.getMinSignedBits() > MaxSignificantBitsInCond)) {
  4783. DeadCases.push_back(Case.getCaseValue());
  4784. if (DTU)
  4785. --NumPerSuccessorCases[Successor];
  4786. LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal
  4787. << " is dead.\n");
  4788. }
  4789. }
  4790. // If we can prove that the cases must cover all possible values, the
  4791. // default destination becomes dead and we can remove it. If we know some
  4792. // of the bits in the value, we can use that to more precisely compute the
  4793. // number of possible unique case values.
  4794. bool HasDefault =
  4795. !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
  4796. const unsigned NumUnknownBits =
  4797. Known.getBitWidth() - (Known.Zero | Known.One).countPopulation();
  4798. assert(NumUnknownBits <= Known.getBitWidth());
  4799. if (HasDefault && DeadCases.empty() &&
  4800. NumUnknownBits < 64 /* avoid overflow */ &&
  4801. SI->getNumCases() == (1ULL << NumUnknownBits)) {
  4802. createUnreachableSwitchDefault(SI, DTU);
  4803. return true;
  4804. }
  4805. if (DeadCases.empty())
  4806. return false;
  4807. SwitchInstProfUpdateWrapper SIW(*SI);
  4808. for (ConstantInt *DeadCase : DeadCases) {
  4809. SwitchInst::CaseIt CaseI = SI->findCaseValue(DeadCase);
  4810. assert(CaseI != SI->case_default() &&
  4811. "Case was not found. Probably mistake in DeadCases forming.");
  4812. // Prune unused values from PHI nodes.
  4813. CaseI->getCaseSuccessor()->removePredecessor(SI->getParent());
  4814. SIW.removeCase(CaseI);
  4815. }
  4816. if (DTU) {
  4817. std::vector<DominatorTree::UpdateType> Updates;
  4818. for (auto *Successor : UniqueSuccessors)
  4819. if (NumPerSuccessorCases[Successor] == 0)
  4820. Updates.push_back({DominatorTree::Delete, SI->getParent(), Successor});
  4821. DTU->applyUpdates(Updates);
  4822. }
  4823. return true;
  4824. }
  4825. /// If BB would be eligible for simplification by
  4826. /// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
  4827. /// by an unconditional branch), look at the phi node for BB in the successor
  4828. /// block and see if the incoming value is equal to CaseValue. If so, return
  4829. /// the phi node, and set PhiIndex to BB's index in the phi node.
  4830. static PHINode *FindPHIForConditionForwarding(ConstantInt *CaseValue,
  4831. BasicBlock *BB, int *PhiIndex) {
  4832. if (BB->getFirstNonPHIOrDbg() != BB->getTerminator())
  4833. return nullptr; // BB must be empty to be a candidate for simplification.
  4834. if (!BB->getSinglePredecessor())
  4835. return nullptr; // BB must be dominated by the switch.
  4836. BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator());
  4837. if (!Branch || !Branch->isUnconditional())
  4838. return nullptr; // Terminator must be unconditional branch.
  4839. BasicBlock *Succ = Branch->getSuccessor(0);
  4840. for (PHINode &PHI : Succ->phis()) {
  4841. int Idx = PHI.getBasicBlockIndex(BB);
  4842. assert(Idx >= 0 && "PHI has no entry for predecessor?");
  4843. Value *InValue = PHI.getIncomingValue(Idx);
  4844. if (InValue != CaseValue)
  4845. continue;
  4846. *PhiIndex = Idx;
  4847. return &PHI;
  4848. }
  4849. return nullptr;
  4850. }
  4851. /// Try to forward the condition of a switch instruction to a phi node
  4852. /// dominated by the switch, if that would mean that some of the destination
  4853. /// blocks of the switch can be folded away. Return true if a change is made.
  4854. static bool ForwardSwitchConditionToPHI(SwitchInst *SI) {
  4855. using ForwardingNodesMap = DenseMap<PHINode *, SmallVector<int, 4>>;
  4856. ForwardingNodesMap ForwardingNodes;
  4857. BasicBlock *SwitchBlock = SI->getParent();
  4858. bool Changed = false;
  4859. for (const auto &Case : SI->cases()) {
  4860. ConstantInt *CaseValue = Case.getCaseValue();
  4861. BasicBlock *CaseDest = Case.getCaseSuccessor();
  4862. // Replace phi operands in successor blocks that are using the constant case
  4863. // value rather than the switch condition variable:
  4864. // switchbb:
  4865. // switch i32 %x, label %default [
  4866. // i32 17, label %succ
  4867. // ...
  4868. // succ:
  4869. // %r = phi i32 ... [ 17, %switchbb ] ...
  4870. // -->
  4871. // %r = phi i32 ... [ %x, %switchbb ] ...
  4872. for (PHINode &Phi : CaseDest->phis()) {
  4873. // This only works if there is exactly 1 incoming edge from the switch to
  4874. // a phi. If there is >1, that means multiple cases of the switch map to 1
  4875. // value in the phi, and that phi value is not the switch condition. Thus,
  4876. // this transform would not make sense (the phi would be invalid because
  4877. // a phi can't have different incoming values from the same block).
  4878. int SwitchBBIdx = Phi.getBasicBlockIndex(SwitchBlock);
  4879. if (Phi.getIncomingValue(SwitchBBIdx) == CaseValue &&
  4880. count(Phi.blocks(), SwitchBlock) == 1) {
  4881. Phi.setIncomingValue(SwitchBBIdx, SI->getCondition());
  4882. Changed = true;
  4883. }
  4884. }
  4885. // Collect phi nodes that are indirectly using this switch's case constants.
  4886. int PhiIdx;
  4887. if (auto *Phi = FindPHIForConditionForwarding(CaseValue, CaseDest, &PhiIdx))
  4888. ForwardingNodes[Phi].push_back(PhiIdx);
  4889. }
  4890. for (auto &ForwardingNode : ForwardingNodes) {
  4891. PHINode *Phi = ForwardingNode.first;
  4892. SmallVectorImpl<int> &Indexes = ForwardingNode.second;
  4893. if (Indexes.size() < 2)
  4894. continue;
  4895. for (int Index : Indexes)
  4896. Phi->setIncomingValue(Index, SI->getCondition());
  4897. Changed = true;
  4898. }
  4899. return Changed;
  4900. }
  4901. /// Return true if the backend will be able to handle
  4902. /// initializing an array of constants like C.
  4903. static bool ValidLookupTableConstant(Constant *C, const TargetTransformInfo &TTI) {
  4904. if (C->isThreadDependent())
  4905. return false;
  4906. if (C->isDLLImportDependent())
  4907. return false;
  4908. if (!isa<ConstantFP>(C) && !isa<ConstantInt>(C) &&
  4909. !isa<ConstantPointerNull>(C) && !isa<GlobalValue>(C) &&
  4910. !isa<UndefValue>(C) && !isa<ConstantExpr>(C))
  4911. return false;
  4912. if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
  4913. // Pointer casts and in-bounds GEPs will not prohibit the backend from
  4914. // materializing the array of constants.
  4915. Constant *StrippedC = cast<Constant>(CE->stripInBoundsConstantOffsets());
  4916. if (StrippedC == C || !ValidLookupTableConstant(StrippedC, TTI))
  4917. return false;
  4918. }
  4919. if (!TTI.shouldBuildLookupTablesForConstant(C))
  4920. return false;
  4921. return true;
  4922. }
  4923. /// If V is a Constant, return it. Otherwise, try to look up
  4924. /// its constant value in ConstantPool, returning 0 if it's not there.
  4925. static Constant *
  4926. LookupConstant(Value *V,
  4927. const SmallDenseMap<Value *, Constant *> &ConstantPool) {
  4928. if (Constant *C = dyn_cast<Constant>(V))
  4929. return C;
  4930. return ConstantPool.lookup(V);
  4931. }
  4932. /// Try to fold instruction I into a constant. This works for
  4933. /// simple instructions such as binary operations where both operands are
  4934. /// constant or can be replaced by constants from the ConstantPool. Returns the
  4935. /// resulting constant on success, 0 otherwise.
  4936. static Constant *
  4937. ConstantFold(Instruction *I, const DataLayout &DL,
  4938. const SmallDenseMap<Value *, Constant *> &ConstantPool) {
  4939. if (SelectInst *Select = dyn_cast<SelectInst>(I)) {
  4940. Constant *A = LookupConstant(Select->getCondition(), ConstantPool);
  4941. if (!A)
  4942. return nullptr;
  4943. if (A->isAllOnesValue())
  4944. return LookupConstant(Select->getTrueValue(), ConstantPool);
  4945. if (A->isNullValue())
  4946. return LookupConstant(Select->getFalseValue(), ConstantPool);
  4947. return nullptr;
  4948. }
  4949. SmallVector<Constant *, 4> COps;
  4950. for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) {
  4951. if (Constant *A = LookupConstant(I->getOperand(N), ConstantPool))
  4952. COps.push_back(A);
  4953. else
  4954. return nullptr;
  4955. }
  4956. return ConstantFoldInstOperands(I, COps, DL);
  4957. }
  4958. /// Try to determine the resulting constant values in phi nodes
  4959. /// at the common destination basic block, *CommonDest, for one of the case
  4960. /// destionations CaseDest corresponding to value CaseVal (0 for the default
  4961. /// case), of a switch instruction SI.
  4962. static bool
  4963. getCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest,
  4964. BasicBlock **CommonDest,
  4965. SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res,
  4966. const DataLayout &DL, const TargetTransformInfo &TTI) {
  4967. // The block from which we enter the common destination.
  4968. BasicBlock *Pred = SI->getParent();
  4969. // If CaseDest is empty except for some side-effect free instructions through
  4970. // which we can constant-propagate the CaseVal, continue to its successor.
  4971. SmallDenseMap<Value *, Constant *> ConstantPool;
  4972. ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal));
  4973. for (Instruction &I : CaseDest->instructionsWithoutDebug(false)) {
  4974. if (I.isTerminator()) {
  4975. // If the terminator is a simple branch, continue to the next block.
  4976. if (I.getNumSuccessors() != 1 || I.isExceptionalTerminator())
  4977. return false;
  4978. Pred = CaseDest;
  4979. CaseDest = I.getSuccessor(0);
  4980. } else if (Constant *C = ConstantFold(&I, DL, ConstantPool)) {
  4981. // Instruction is side-effect free and constant.
  4982. // If the instruction has uses outside this block or a phi node slot for
  4983. // the block, it is not safe to bypass the instruction since it would then
  4984. // no longer dominate all its uses.
  4985. for (auto &Use : I.uses()) {
  4986. User *User = Use.getUser();
  4987. if (Instruction *I = dyn_cast<Instruction>(User))
  4988. if (I->getParent() == CaseDest)
  4989. continue;
  4990. if (PHINode *Phi = dyn_cast<PHINode>(User))
  4991. if (Phi->getIncomingBlock(Use) == CaseDest)
  4992. continue;
  4993. return false;
  4994. }
  4995. ConstantPool.insert(std::make_pair(&I, C));
  4996. } else {
  4997. break;
  4998. }
  4999. }
  5000. // If we did not have a CommonDest before, use the current one.
  5001. if (!*CommonDest)
  5002. *CommonDest = CaseDest;
  5003. // If the destination isn't the common one, abort.
  5004. if (CaseDest != *CommonDest)
  5005. return false;
  5006. // Get the values for this case from phi nodes in the destination block.
  5007. for (PHINode &PHI : (*CommonDest)->phis()) {
  5008. int Idx = PHI.getBasicBlockIndex(Pred);
  5009. if (Idx == -1)
  5010. continue;
  5011. Constant *ConstVal =
  5012. LookupConstant(PHI.getIncomingValue(Idx), ConstantPool);
  5013. if (!ConstVal)
  5014. return false;
  5015. // Be conservative about which kinds of constants we support.
  5016. if (!ValidLookupTableConstant(ConstVal, TTI))
  5017. return false;
  5018. Res.push_back(std::make_pair(&PHI, ConstVal));
  5019. }
  5020. return Res.size() > 0;
  5021. }
  5022. // Helper function used to add CaseVal to the list of cases that generate
  5023. // Result. Returns the updated number of cases that generate this result.
  5024. static size_t mapCaseToResult(ConstantInt *CaseVal,
  5025. SwitchCaseResultVectorTy &UniqueResults,
  5026. Constant *Result) {
  5027. for (auto &I : UniqueResults) {
  5028. if (I.first == Result) {
  5029. I.second.push_back(CaseVal);
  5030. return I.second.size();
  5031. }
  5032. }
  5033. UniqueResults.push_back(
  5034. std::make_pair(Result, SmallVector<ConstantInt *, 4>(1, CaseVal)));
  5035. return 1;
  5036. }
  5037. // Helper function that initializes a map containing
  5038. // results for the PHI node of the common destination block for a switch
  5039. // instruction. Returns false if multiple PHI nodes have been found or if
  5040. // there is not a common destination block for the switch.
  5041. static bool initializeUniqueCases(SwitchInst *SI, PHINode *&PHI,
  5042. BasicBlock *&CommonDest,
  5043. SwitchCaseResultVectorTy &UniqueResults,
  5044. Constant *&DefaultResult,
  5045. const DataLayout &DL,
  5046. const TargetTransformInfo &TTI,
  5047. uintptr_t MaxUniqueResults) {
  5048. for (const auto &I : SI->cases()) {
  5049. ConstantInt *CaseVal = I.getCaseValue();
  5050. // Resulting value at phi nodes for this case value.
  5051. SwitchCaseResultsTy Results;
  5052. if (!getCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results,
  5053. DL, TTI))
  5054. return false;
  5055. // Only one value per case is permitted.
  5056. if (Results.size() > 1)
  5057. return false;
  5058. // Add the case->result mapping to UniqueResults.
  5059. const size_t NumCasesForResult =
  5060. mapCaseToResult(CaseVal, UniqueResults, Results.begin()->second);
  5061. // Early out if there are too many cases for this result.
  5062. if (NumCasesForResult > MaxSwitchCasesPerResult)
  5063. return false;
  5064. // Early out if there are too many unique results.
  5065. if (UniqueResults.size() > MaxUniqueResults)
  5066. return false;
  5067. // Check the PHI consistency.
  5068. if (!PHI)
  5069. PHI = Results[0].first;
  5070. else if (PHI != Results[0].first)
  5071. return false;
  5072. }
  5073. // Find the default result value.
  5074. SmallVector<std::pair<PHINode *, Constant *>, 1> DefaultResults;
  5075. BasicBlock *DefaultDest = SI->getDefaultDest();
  5076. getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults,
  5077. DL, TTI);
  5078. // If the default value is not found abort unless the default destination
  5079. // is unreachable.
  5080. DefaultResult =
  5081. DefaultResults.size() == 1 ? DefaultResults.begin()->second : nullptr;
  5082. if ((!DefaultResult &&
  5083. !isa<UnreachableInst>(DefaultDest->getFirstNonPHIOrDbg())))
  5084. return false;
  5085. return true;
  5086. }
  5087. // Helper function that checks if it is possible to transform a switch with only
  5088. // two cases (or two cases + default) that produces a result into a select.
  5089. // TODO: Handle switches with more than 2 cases that map to the same result.
  5090. static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
  5091. Constant *DefaultResult, Value *Condition,
  5092. IRBuilder<> &Builder) {
  5093. // If we are selecting between only two cases transform into a simple
  5094. // select or a two-way select if default is possible.
  5095. // Example:
  5096. // switch (a) { %0 = icmp eq i32 %a, 10
  5097. // case 10: return 42; %1 = select i1 %0, i32 42, i32 4
  5098. // case 20: return 2; ----> %2 = icmp eq i32 %a, 20
  5099. // default: return 4; %3 = select i1 %2, i32 2, i32 %1
  5100. // }
  5101. if (ResultVector.size() == 2 && ResultVector[0].second.size() == 1 &&
  5102. ResultVector[1].second.size() == 1) {
  5103. ConstantInt *FirstCase = ResultVector[0].second[0];
  5104. ConstantInt *SecondCase = ResultVector[1].second[0];
  5105. Value *SelectValue = ResultVector[1].first;
  5106. if (DefaultResult) {
  5107. Value *ValueCompare =
  5108. Builder.CreateICmpEQ(Condition, SecondCase, "switch.selectcmp");
  5109. SelectValue = Builder.CreateSelect(ValueCompare, ResultVector[1].first,
  5110. DefaultResult, "switch.select");
  5111. }
  5112. Value *ValueCompare =
  5113. Builder.CreateICmpEQ(Condition, FirstCase, "switch.selectcmp");
  5114. return Builder.CreateSelect(ValueCompare, ResultVector[0].first,
  5115. SelectValue, "switch.select");
  5116. }
  5117. // Handle the degenerate case where two cases have the same result value.
  5118. if (ResultVector.size() == 1 && DefaultResult) {
  5119. ArrayRef<ConstantInt *> CaseValues = ResultVector[0].second;
  5120. unsigned CaseCount = CaseValues.size();
  5121. // n bits group cases map to the same result:
  5122. // case 0,4 -> Cond & 0b1..1011 == 0 ? result : default
  5123. // case 0,2,4,6 -> Cond & 0b1..1001 == 0 ? result : default
  5124. // case 0,2,8,10 -> Cond & 0b1..0101 == 0 ? result : default
  5125. if (isPowerOf2_32(CaseCount)) {
  5126. ConstantInt *MinCaseVal = CaseValues[0];
  5127. // Find mininal value.
  5128. for (auto *Case : CaseValues)
  5129. if (Case->getValue().slt(MinCaseVal->getValue()))
  5130. MinCaseVal = Case;
  5131. // Mark the bits case number touched.
  5132. APInt BitMask = APInt::getZero(MinCaseVal->getBitWidth());
  5133. for (auto *Case : CaseValues)
  5134. BitMask |= (Case->getValue() - MinCaseVal->getValue());
  5135. // Check if cases with the same result can cover all number
  5136. // in touched bits.
  5137. if (BitMask.countPopulation() == Log2_32(CaseCount)) {
  5138. if (!MinCaseVal->isNullValue())
  5139. Condition = Builder.CreateSub(Condition, MinCaseVal);
  5140. Value *And = Builder.CreateAnd(Condition, ~BitMask, "switch.and");
  5141. Value *Cmp = Builder.CreateICmpEQ(
  5142. And, Constant::getNullValue(And->getType()), "switch.selectcmp");
  5143. return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
  5144. }
  5145. }
  5146. // Handle the degenerate case where two cases have the same value.
  5147. if (CaseValues.size() == 2) {
  5148. Value *Cmp1 = Builder.CreateICmpEQ(Condition, CaseValues[0],
  5149. "switch.selectcmp.case1");
  5150. Value *Cmp2 = Builder.CreateICmpEQ(Condition, CaseValues[1],
  5151. "switch.selectcmp.case2");
  5152. Value *Cmp = Builder.CreateOr(Cmp1, Cmp2, "switch.selectcmp");
  5153. return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
  5154. }
  5155. }
  5156. return nullptr;
  5157. }
  5158. // Helper function to cleanup a switch instruction that has been converted into
  5159. // a select, fixing up PHI nodes and basic blocks.
  5160. static void removeSwitchAfterSelectFold(SwitchInst *SI, PHINode *PHI,
  5161. Value *SelectValue,
  5162. IRBuilder<> &Builder,
  5163. DomTreeUpdater *DTU) {
  5164. std::vector<DominatorTree::UpdateType> Updates;
  5165. BasicBlock *SelectBB = SI->getParent();
  5166. BasicBlock *DestBB = PHI->getParent();
  5167. if (DTU && !is_contained(predecessors(DestBB), SelectBB))
  5168. Updates.push_back({DominatorTree::Insert, SelectBB, DestBB});
  5169. Builder.CreateBr(DestBB);
  5170. // Remove the switch.
  5171. while (PHI->getBasicBlockIndex(SelectBB) >= 0)
  5172. PHI->removeIncomingValue(SelectBB);
  5173. PHI->addIncoming(SelectValue, SelectBB);
  5174. SmallPtrSet<BasicBlock *, 4> RemovedSuccessors;
  5175. for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
  5176. BasicBlock *Succ = SI->getSuccessor(i);
  5177. if (Succ == DestBB)
  5178. continue;
  5179. Succ->removePredecessor(SelectBB);
  5180. if (DTU && RemovedSuccessors.insert(Succ).second)
  5181. Updates.push_back({DominatorTree::Delete, SelectBB, Succ});
  5182. }
  5183. SI->eraseFromParent();
  5184. if (DTU)
  5185. DTU->applyUpdates(Updates);
  5186. }
  5187. /// If a switch is only used to initialize one or more phi nodes in a common
  5188. /// successor block with only two different constant values, try to replace the
  5189. /// switch with a select. Returns true if the fold was made.
  5190. static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
  5191. DomTreeUpdater *DTU, const DataLayout &DL,
  5192. const TargetTransformInfo &TTI) {
  5193. Value *const Cond = SI->getCondition();
  5194. PHINode *PHI = nullptr;
  5195. BasicBlock *CommonDest = nullptr;
  5196. Constant *DefaultResult;
  5197. SwitchCaseResultVectorTy UniqueResults;
  5198. // Collect all the cases that will deliver the same value from the switch.
  5199. if (!initializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
  5200. DL, TTI, /*MaxUniqueResults*/ 2))
  5201. return false;
  5202. assert(PHI != nullptr && "PHI for value select not found");
  5203. Builder.SetInsertPoint(SI);
  5204. Value *SelectValue =
  5205. foldSwitchToSelect(UniqueResults, DefaultResult, Cond, Builder);
  5206. if (!SelectValue)
  5207. return false;
  5208. removeSwitchAfterSelectFold(SI, PHI, SelectValue, Builder, DTU);
  5209. return true;
  5210. }
  5211. namespace {
  5212. /// This class represents a lookup table that can be used to replace a switch.
  5213. class SwitchLookupTable {
  5214. public:
  5215. /// Create a lookup table to use as a switch replacement with the contents
  5216. /// of Values, using DefaultValue to fill any holes in the table.
  5217. SwitchLookupTable(
  5218. Module &M, uint64_t TableSize, ConstantInt *Offset,
  5219. const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
  5220. Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName);
  5221. /// Build instructions with Builder to retrieve the value at
  5222. /// the position given by Index in the lookup table.
  5223. Value *BuildLookup(Value *Index, IRBuilder<> &Builder);
  5224. /// Return true if a table with TableSize elements of
  5225. /// type ElementType would fit in a target-legal register.
  5226. static bool WouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
  5227. Type *ElementType);
  5228. private:
  5229. // Depending on the contents of the table, it can be represented in
  5230. // different ways.
  5231. enum {
  5232. // For tables where each element contains the same value, we just have to
  5233. // store that single value and return it for each lookup.
  5234. SingleValueKind,
  5235. // For tables where there is a linear relationship between table index
  5236. // and values. We calculate the result with a simple multiplication
  5237. // and addition instead of a table lookup.
  5238. LinearMapKind,
  5239. // For small tables with integer elements, we can pack them into a bitmap
  5240. // that fits into a target-legal register. Values are retrieved by
  5241. // shift and mask operations.
  5242. BitMapKind,
  5243. // The table is stored as an array of values. Values are retrieved by load
  5244. // instructions from the table.
  5245. ArrayKind
  5246. } Kind;
  5247. // For SingleValueKind, this is the single value.
  5248. Constant *SingleValue = nullptr;
  5249. // For BitMapKind, this is the bitmap.
  5250. ConstantInt *BitMap = nullptr;
  5251. IntegerType *BitMapElementTy = nullptr;
  5252. // For LinearMapKind, these are the constants used to derive the value.
  5253. ConstantInt *LinearOffset = nullptr;
  5254. ConstantInt *LinearMultiplier = nullptr;
  5255. // For ArrayKind, this is the array.
  5256. GlobalVariable *Array = nullptr;
  5257. };
  5258. } // end anonymous namespace
  5259. SwitchLookupTable::SwitchLookupTable(
  5260. Module &M, uint64_t TableSize, ConstantInt *Offset,
  5261. const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
  5262. Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName) {
  5263. assert(Values.size() && "Can't build lookup table without values!");
  5264. assert(TableSize >= Values.size() && "Can't fit values in table!");
  5265. // If all values in the table are equal, this is that value.
  5266. SingleValue = Values.begin()->second;
  5267. Type *ValueType = Values.begin()->second->getType();
  5268. // Build up the table contents.
  5269. SmallVector<Constant *, 64> TableContents(TableSize);
  5270. for (size_t I = 0, E = Values.size(); I != E; ++I) {
  5271. ConstantInt *CaseVal = Values[I].first;
  5272. Constant *CaseRes = Values[I].second;
  5273. assert(CaseRes->getType() == ValueType);
  5274. uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
  5275. TableContents[Idx] = CaseRes;
  5276. if (CaseRes != SingleValue)
  5277. SingleValue = nullptr;
  5278. }
  5279. // Fill in any holes in the table with the default result.
  5280. if (Values.size() < TableSize) {
  5281. assert(DefaultValue &&
  5282. "Need a default value to fill the lookup table holes.");
  5283. assert(DefaultValue->getType() == ValueType);
  5284. for (uint64_t I = 0; I < TableSize; ++I) {
  5285. if (!TableContents[I])
  5286. TableContents[I] = DefaultValue;
  5287. }
  5288. if (DefaultValue != SingleValue)
  5289. SingleValue = nullptr;
  5290. }
  5291. // If each element in the table contains the same value, we only need to store
  5292. // that single value.
  5293. if (SingleValue) {
  5294. Kind = SingleValueKind;
  5295. return;
  5296. }
  5297. // Check if we can derive the value with a linear transformation from the
  5298. // table index.
  5299. if (isa<IntegerType>(ValueType)) {
  5300. bool LinearMappingPossible = true;
  5301. APInt PrevVal;
  5302. APInt DistToPrev;
  5303. assert(TableSize >= 2 && "Should be a SingleValue table.");
  5304. // Check if there is the same distance between two consecutive values.
  5305. for (uint64_t I = 0; I < TableSize; ++I) {
  5306. ConstantInt *ConstVal = dyn_cast<ConstantInt>(TableContents[I]);
  5307. if (!ConstVal) {
  5308. // This is an undef. We could deal with it, but undefs in lookup tables
  5309. // are very seldom. It's probably not worth the additional complexity.
  5310. LinearMappingPossible = false;
  5311. break;
  5312. }
  5313. const APInt &Val = ConstVal->getValue();
  5314. if (I != 0) {
  5315. APInt Dist = Val - PrevVal;
  5316. if (I == 1) {
  5317. DistToPrev = Dist;
  5318. } else if (Dist != DistToPrev) {
  5319. LinearMappingPossible = false;
  5320. break;
  5321. }
  5322. }
  5323. PrevVal = Val;
  5324. }
  5325. if (LinearMappingPossible) {
  5326. LinearOffset = cast<ConstantInt>(TableContents[0]);
  5327. LinearMultiplier = ConstantInt::get(M.getContext(), DistToPrev);
  5328. Kind = LinearMapKind;
  5329. ++NumLinearMaps;
  5330. return;
  5331. }
  5332. }
  5333. // If the type is integer and the table fits in a register, build a bitmap.
  5334. if (WouldFitInRegister(DL, TableSize, ValueType)) {
  5335. IntegerType *IT = cast<IntegerType>(ValueType);
  5336. APInt TableInt(TableSize * IT->getBitWidth(), 0);
  5337. for (uint64_t I = TableSize; I > 0; --I) {
  5338. TableInt <<= IT->getBitWidth();
  5339. // Insert values into the bitmap. Undef values are set to zero.
  5340. if (!isa<UndefValue>(TableContents[I - 1])) {
  5341. ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]);
  5342. TableInt |= Val->getValue().zext(TableInt.getBitWidth());
  5343. }
  5344. }
  5345. BitMap = ConstantInt::get(M.getContext(), TableInt);
  5346. BitMapElementTy = IT;
  5347. Kind = BitMapKind;
  5348. ++NumBitMaps;
  5349. return;
  5350. }
  5351. // Store the table in an array.
  5352. ArrayType *ArrayTy = ArrayType::get(ValueType, TableSize);
  5353. Constant *Initializer = ConstantArray::get(ArrayTy, TableContents);
  5354. Array = new GlobalVariable(M, ArrayTy, /*isConstant=*/true,
  5355. GlobalVariable::PrivateLinkage, Initializer,
  5356. "switch.table." + FuncName);
  5357. Array->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
  5358. // Set the alignment to that of an array items. We will be only loading one
  5359. // value out of it.
  5360. Array->setAlignment(DL.getPrefTypeAlign(ValueType));
  5361. Kind = ArrayKind;
  5362. }
  5363. Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) {
  5364. switch (Kind) {
  5365. case SingleValueKind:
  5366. return SingleValue;
  5367. case LinearMapKind: {
  5368. // Derive the result value from the input value.
  5369. Value *Result = Builder.CreateIntCast(Index, LinearMultiplier->getType(),
  5370. false, "switch.idx.cast");
  5371. if (!LinearMultiplier->isOne())
  5372. Result = Builder.CreateMul(Result, LinearMultiplier, "switch.idx.mult");
  5373. if (!LinearOffset->isZero())
  5374. Result = Builder.CreateAdd(Result, LinearOffset, "switch.offset");
  5375. return Result;
  5376. }
  5377. case BitMapKind: {
  5378. // Type of the bitmap (e.g. i59).
  5379. IntegerType *MapTy = BitMap->getType();
  5380. // Cast Index to the same type as the bitmap.
  5381. // Note: The Index is <= the number of elements in the table, so
  5382. // truncating it to the width of the bitmask is safe.
  5383. Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast");
  5384. // Multiply the shift amount by the element width.
  5385. ShiftAmt = Builder.CreateMul(
  5386. ShiftAmt, ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()),
  5387. "switch.shiftamt");
  5388. // Shift down.
  5389. Value *DownShifted =
  5390. Builder.CreateLShr(BitMap, ShiftAmt, "switch.downshift");
  5391. // Mask off.
  5392. return Builder.CreateTrunc(DownShifted, BitMapElementTy, "switch.masked");
  5393. }
  5394. case ArrayKind: {
  5395. // Make sure the table index will not overflow when treated as signed.
  5396. IntegerType *IT = cast<IntegerType>(Index->getType());
  5397. uint64_t TableSize =
  5398. Array->getInitializer()->getType()->getArrayNumElements();
  5399. if (TableSize > (1ULL << std::min(IT->getBitWidth() - 1, 63u)))
  5400. Index = Builder.CreateZExt(
  5401. Index, IntegerType::get(IT->getContext(), IT->getBitWidth() + 1),
  5402. "switch.tableidx.zext");
  5403. Value *GEPIndices[] = {Builder.getInt32(0), Index};
  5404. Value *GEP = Builder.CreateInBoundsGEP(Array->getValueType(), Array,
  5405. GEPIndices, "switch.gep");
  5406. return Builder.CreateLoad(
  5407. cast<ArrayType>(Array->getValueType())->getElementType(), GEP,
  5408. "switch.load");
  5409. }
  5410. }
  5411. llvm_unreachable("Unknown lookup table kind!");
  5412. }
  5413. bool SwitchLookupTable::WouldFitInRegister(const DataLayout &DL,
  5414. uint64_t TableSize,
  5415. Type *ElementType) {
  5416. auto *IT = dyn_cast<IntegerType>(ElementType);
  5417. if (!IT)
  5418. return false;
  5419. // FIXME: If the type is wider than it needs to be, e.g. i8 but all values
  5420. // are <= 15, we could try to narrow the type.
  5421. // Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
  5422. if (TableSize >= UINT_MAX / IT->getBitWidth())
  5423. return false;
  5424. return DL.fitsInLegalInteger(TableSize * IT->getBitWidth());
  5425. }
  5426. static bool isTypeLegalForLookupTable(Type *Ty, const TargetTransformInfo &TTI,
  5427. const DataLayout &DL) {
  5428. // Allow any legal type.
  5429. if (TTI.isTypeLegal(Ty))
  5430. return true;
  5431. auto *IT = dyn_cast<IntegerType>(Ty);
  5432. if (!IT)
  5433. return false;
  5434. // Also allow power of 2 integer types that have at least 8 bits and fit in
  5435. // a register. These types are common in frontend languages and targets
  5436. // usually support loads of these types.
  5437. // TODO: We could relax this to any integer that fits in a register and rely
  5438. // on ABI alignment and padding in the table to allow the load to be widened.
  5439. // Or we could widen the constants and truncate the load.
  5440. unsigned BitWidth = IT->getBitWidth();
  5441. return BitWidth >= 8 && isPowerOf2_32(BitWidth) &&
  5442. DL.fitsInLegalInteger(IT->getBitWidth());
  5443. }
  5444. static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange) {
  5445. // 40% is the default density for building a jump table in optsize/minsize
  5446. // mode. See also TargetLoweringBase::isSuitableForJumpTable(), which this
  5447. // function was based on.
  5448. const uint64_t MinDensity = 40;
  5449. if (CaseRange >= UINT64_MAX / 100)
  5450. return false; // Avoid multiplication overflows below.
  5451. return NumCases * 100 >= CaseRange * MinDensity;
  5452. }
  5453. static bool isSwitchDense(ArrayRef<int64_t> Values) {
  5454. uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
  5455. uint64_t Range = Diff + 1;
  5456. if (Range < Diff)
  5457. return false; // Overflow.
  5458. return isSwitchDense(Values.size(), Range);
  5459. }
  5460. /// Determine whether a lookup table should be built for this switch, based on
  5461. /// the number of cases, size of the table, and the types of the results.
  5462. // TODO: We could support larger than legal types by limiting based on the
  5463. // number of loads required and/or table size. If the constants are small we
  5464. // could use smaller table entries and extend after the load.
  5465. static bool
  5466. ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize,
  5467. const TargetTransformInfo &TTI, const DataLayout &DL,
  5468. const SmallDenseMap<PHINode *, Type *> &ResultTypes) {
  5469. if (SI->getNumCases() > TableSize)
  5470. return false; // TableSize overflowed.
  5471. bool AllTablesFitInRegister = true;
  5472. bool HasIllegalType = false;
  5473. for (const auto &I : ResultTypes) {
  5474. Type *Ty = I.second;
  5475. // Saturate this flag to true.
  5476. HasIllegalType = HasIllegalType || !isTypeLegalForLookupTable(Ty, TTI, DL);
  5477. // Saturate this flag to false.
  5478. AllTablesFitInRegister =
  5479. AllTablesFitInRegister &&
  5480. SwitchLookupTable::WouldFitInRegister(DL, TableSize, Ty);
  5481. // If both flags saturate, we're done. NOTE: This *only* works with
  5482. // saturating flags, and all flags have to saturate first due to the
  5483. // non-deterministic behavior of iterating over a dense map.
  5484. if (HasIllegalType && !AllTablesFitInRegister)
  5485. break;
  5486. }
  5487. // If each table would fit in a register, we should build it anyway.
  5488. if (AllTablesFitInRegister)
  5489. return true;
  5490. // Don't build a table that doesn't fit in-register if it has illegal types.
  5491. if (HasIllegalType)
  5492. return false;
  5493. return isSwitchDense(SI->getNumCases(), TableSize);
  5494. }
  5495. static bool ShouldUseSwitchConditionAsTableIndex(
  5496. ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal,
  5497. bool HasDefaultResults, const SmallDenseMap<PHINode *, Type *> &ResultTypes,
  5498. const DataLayout &DL, const TargetTransformInfo &TTI) {
  5499. if (MinCaseVal.isNullValue())
  5500. return true;
  5501. if (MinCaseVal.isNegative() ||
  5502. MaxCaseVal.getLimitedValue() == std::numeric_limits<uint64_t>::max() ||
  5503. !HasDefaultResults)
  5504. return false;
  5505. return all_of(ResultTypes, [&](const auto &KV) {
  5506. return SwitchLookupTable::WouldFitInRegister(
  5507. DL, MaxCaseVal.getLimitedValue() + 1 /* TableSize */,
  5508. KV.second /* ResultType */);
  5509. });
  5510. }
  5511. /// Try to reuse the switch table index compare. Following pattern:
  5512. /// \code
  5513. /// if (idx < tablesize)
  5514. /// r = table[idx]; // table does not contain default_value
  5515. /// else
  5516. /// r = default_value;
  5517. /// if (r != default_value)
  5518. /// ...
  5519. /// \endcode
  5520. /// Is optimized to:
  5521. /// \code
  5522. /// cond = idx < tablesize;
  5523. /// if (cond)
  5524. /// r = table[idx];
  5525. /// else
  5526. /// r = default_value;
  5527. /// if (cond)
  5528. /// ...
  5529. /// \endcode
  5530. /// Jump threading will then eliminate the second if(cond).
  5531. static void reuseTableCompare(
  5532. User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch,
  5533. Constant *DefaultValue,
  5534. const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values) {
  5535. ICmpInst *CmpInst = dyn_cast<ICmpInst>(PhiUser);
  5536. if (!CmpInst)
  5537. return;
  5538. // We require that the compare is in the same block as the phi so that jump
  5539. // threading can do its work afterwards.
  5540. if (CmpInst->getParent() != PhiBlock)
  5541. return;
  5542. Constant *CmpOp1 = dyn_cast<Constant>(CmpInst->getOperand(1));
  5543. if (!CmpOp1)
  5544. return;
  5545. Value *RangeCmp = RangeCheckBranch->getCondition();
  5546. Constant *TrueConst = ConstantInt::getTrue(RangeCmp->getType());
  5547. Constant *FalseConst = ConstantInt::getFalse(RangeCmp->getType());
  5548. // Check if the compare with the default value is constant true or false.
  5549. Constant *DefaultConst = ConstantExpr::getICmp(CmpInst->getPredicate(),
  5550. DefaultValue, CmpOp1, true);
  5551. if (DefaultConst != TrueConst && DefaultConst != FalseConst)
  5552. return;
  5553. // Check if the compare with the case values is distinct from the default
  5554. // compare result.
  5555. for (auto ValuePair : Values) {
  5556. Constant *CaseConst = ConstantExpr::getICmp(CmpInst->getPredicate(),
  5557. ValuePair.second, CmpOp1, true);
  5558. if (!CaseConst || CaseConst == DefaultConst ||
  5559. (CaseConst != TrueConst && CaseConst != FalseConst))
  5560. return;
  5561. }
  5562. // Check if the branch instruction dominates the phi node. It's a simple
  5563. // dominance check, but sufficient for our needs.
  5564. // Although this check is invariant in the calling loops, it's better to do it
  5565. // at this late stage. Practically we do it at most once for a switch.
  5566. BasicBlock *BranchBlock = RangeCheckBranch->getParent();
  5567. for (BasicBlock *Pred : predecessors(PhiBlock)) {
  5568. if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock)
  5569. return;
  5570. }
  5571. if (DefaultConst == FalseConst) {
  5572. // The compare yields the same result. We can replace it.
  5573. CmpInst->replaceAllUsesWith(RangeCmp);
  5574. ++NumTableCmpReuses;
  5575. } else {
  5576. // The compare yields the same result, just inverted. We can replace it.
  5577. Value *InvertedTableCmp = BinaryOperator::CreateXor(
  5578. RangeCmp, ConstantInt::get(RangeCmp->getType(), 1), "inverted.cmp",
  5579. RangeCheckBranch);
  5580. CmpInst->replaceAllUsesWith(InvertedTableCmp);
  5581. ++NumTableCmpReuses;
  5582. }
  5583. }
  5584. /// If the switch is only used to initialize one or more phi nodes in a common
  5585. /// successor block with different constant values, replace the switch with
  5586. /// lookup tables.
  5587. static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
  5588. DomTreeUpdater *DTU, const DataLayout &DL,
  5589. const TargetTransformInfo &TTI) {
  5590. assert(SI->getNumCases() > 1 && "Degenerate switch?");
  5591. BasicBlock *BB = SI->getParent();
  5592. Function *Fn = BB->getParent();
  5593. // Only build lookup table when we have a target that supports it or the
  5594. // attribute is not set.
  5595. if (!TTI.shouldBuildLookupTables() ||
  5596. (Fn->getFnAttribute("no-jump-tables").getValueAsBool()))
  5597. return false;
  5598. // FIXME: If the switch is too sparse for a lookup table, perhaps we could
  5599. // split off a dense part and build a lookup table for that.
  5600. // FIXME: This creates arrays of GEPs to constant strings, which means each
  5601. // GEP needs a runtime relocation in PIC code. We should just build one big
  5602. // string and lookup indices into that.
  5603. // Ignore switches with less than three cases. Lookup tables will not make
  5604. // them faster, so we don't analyze them.
  5605. if (SI->getNumCases() < 3)
  5606. return false;
  5607. // Figure out the corresponding result for each case value and phi node in the
  5608. // common destination, as well as the min and max case values.
  5609. assert(!SI->cases().empty());
  5610. SwitchInst::CaseIt CI = SI->case_begin();
  5611. ConstantInt *MinCaseVal = CI->getCaseValue();
  5612. ConstantInt *MaxCaseVal = CI->getCaseValue();
  5613. BasicBlock *CommonDest = nullptr;
  5614. using ResultListTy = SmallVector<std::pair<ConstantInt *, Constant *>, 4>;
  5615. SmallDenseMap<PHINode *, ResultListTy> ResultLists;
  5616. SmallDenseMap<PHINode *, Constant *> DefaultResults;
  5617. SmallDenseMap<PHINode *, Type *> ResultTypes;
  5618. SmallVector<PHINode *, 4> PHIs;
  5619. for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
  5620. ConstantInt *CaseVal = CI->getCaseValue();
  5621. if (CaseVal->getValue().slt(MinCaseVal->getValue()))
  5622. MinCaseVal = CaseVal;
  5623. if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
  5624. MaxCaseVal = CaseVal;
  5625. // Resulting value at phi nodes for this case value.
  5626. using ResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
  5627. ResultsTy Results;
  5628. if (!getCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest,
  5629. Results, DL, TTI))
  5630. return false;
  5631. // Append the result from this case to the list for each phi.
  5632. for (const auto &I : Results) {
  5633. PHINode *PHI = I.first;
  5634. Constant *Value = I.second;
  5635. if (!ResultLists.count(PHI))
  5636. PHIs.push_back(PHI);
  5637. ResultLists[PHI].push_back(std::make_pair(CaseVal, Value));
  5638. }
  5639. }
  5640. // Keep track of the result types.
  5641. for (PHINode *PHI : PHIs) {
  5642. ResultTypes[PHI] = ResultLists[PHI][0].second->getType();
  5643. }
  5644. uint64_t NumResults = ResultLists[PHIs[0]].size();
  5645. // If the table has holes, we need a constant result for the default case
  5646. // or a bitmask that fits in a register.
  5647. SmallVector<std::pair<PHINode *, Constant *>, 4> DefaultResultsList;
  5648. bool HasDefaultResults =
  5649. getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest,
  5650. DefaultResultsList, DL, TTI);
  5651. for (const auto &I : DefaultResultsList) {
  5652. PHINode *PHI = I.first;
  5653. Constant *Result = I.second;
  5654. DefaultResults[PHI] = Result;
  5655. }
  5656. bool UseSwitchConditionAsTableIndex = ShouldUseSwitchConditionAsTableIndex(
  5657. *MinCaseVal, *MaxCaseVal, HasDefaultResults, ResultTypes, DL, TTI);
  5658. uint64_t TableSize;
  5659. if (UseSwitchConditionAsTableIndex)
  5660. TableSize = MaxCaseVal->getLimitedValue() + 1;
  5661. else
  5662. TableSize =
  5663. (MaxCaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue() + 1;
  5664. bool TableHasHoles = (NumResults < TableSize);
  5665. bool NeedMask = (TableHasHoles && !HasDefaultResults);
  5666. if (NeedMask) {
  5667. // As an extra penalty for the validity test we require more cases.
  5668. if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark).
  5669. return false;
  5670. if (!DL.fitsInLegalInteger(TableSize))
  5671. return false;
  5672. }
  5673. if (!ShouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
  5674. return false;
  5675. std::vector<DominatorTree::UpdateType> Updates;
  5676. // Create the BB that does the lookups.
  5677. Module &Mod = *CommonDest->getParent()->getParent();
  5678. BasicBlock *LookupBB = BasicBlock::Create(
  5679. Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest);
  5680. // Compute the table index value.
  5681. Builder.SetInsertPoint(SI);
  5682. Value *TableIndex;
  5683. ConstantInt *TableIndexOffset;
  5684. if (UseSwitchConditionAsTableIndex) {
  5685. TableIndexOffset = ConstantInt::get(MaxCaseVal->getType(), 0);
  5686. TableIndex = SI->getCondition();
  5687. } else {
  5688. TableIndexOffset = MinCaseVal;
  5689. TableIndex =
  5690. Builder.CreateSub(SI->getCondition(), TableIndexOffset, "switch.tableidx");
  5691. }
  5692. // Compute the maximum table size representable by the integer type we are
  5693. // switching upon.
  5694. unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
  5695. uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize;
  5696. assert(MaxTableSize >= TableSize &&
  5697. "It is impossible for a switch to have more entries than the max "
  5698. "representable value of its input integer type's size.");
  5699. // If the default destination is unreachable, or if the lookup table covers
  5700. // all values of the conditional variable, branch directly to the lookup table
  5701. // BB. Otherwise, check that the condition is within the case range.
  5702. const bool DefaultIsReachable =
  5703. !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
  5704. const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
  5705. BranchInst *RangeCheckBranch = nullptr;
  5706. if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
  5707. Builder.CreateBr(LookupBB);
  5708. if (DTU)
  5709. Updates.push_back({DominatorTree::Insert, BB, LookupBB});
  5710. // Note: We call removeProdecessor later since we need to be able to get the
  5711. // PHI value for the default case in case we're using a bit mask.
  5712. } else {
  5713. Value *Cmp = Builder.CreateICmpULT(
  5714. TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize));
  5715. RangeCheckBranch =
  5716. Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
  5717. if (DTU)
  5718. Updates.push_back({DominatorTree::Insert, BB, LookupBB});
  5719. }
  5720. // Populate the BB that does the lookups.
  5721. Builder.SetInsertPoint(LookupBB);
  5722. if (NeedMask) {
  5723. // Before doing the lookup, we do the hole check. The LookupBB is therefore
  5724. // re-purposed to do the hole check, and we create a new LookupBB.
  5725. BasicBlock *MaskBB = LookupBB;
  5726. MaskBB->setName("switch.hole_check");
  5727. LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup",
  5728. CommonDest->getParent(), CommonDest);
  5729. // Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid
  5730. // unnecessary illegal types.
  5731. uint64_t TableSizePowOf2 = NextPowerOf2(std::max(7ULL, TableSize - 1ULL));
  5732. APInt MaskInt(TableSizePowOf2, 0);
  5733. APInt One(TableSizePowOf2, 1);
  5734. // Build bitmask; fill in a 1 bit for every case.
  5735. const ResultListTy &ResultList = ResultLists[PHIs[0]];
  5736. for (size_t I = 0, E = ResultList.size(); I != E; ++I) {
  5737. uint64_t Idx = (ResultList[I].first->getValue() - TableIndexOffset->getValue())
  5738. .getLimitedValue();
  5739. MaskInt |= One << Idx;
  5740. }
  5741. ConstantInt *TableMask = ConstantInt::get(Mod.getContext(), MaskInt);
  5742. // Get the TableIndex'th bit of the bitmask.
  5743. // If this bit is 0 (meaning hole) jump to the default destination,
  5744. // else continue with table lookup.
  5745. IntegerType *MapTy = TableMask->getType();
  5746. Value *MaskIndex =
  5747. Builder.CreateZExtOrTrunc(TableIndex, MapTy, "switch.maskindex");
  5748. Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex, "switch.shifted");
  5749. Value *LoBit = Builder.CreateTrunc(
  5750. Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit");
  5751. Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest());
  5752. if (DTU) {
  5753. Updates.push_back({DominatorTree::Insert, MaskBB, LookupBB});
  5754. Updates.push_back({DominatorTree::Insert, MaskBB, SI->getDefaultDest()});
  5755. }
  5756. Builder.SetInsertPoint(LookupBB);
  5757. AddPredecessorToBlock(SI->getDefaultDest(), MaskBB, BB);
  5758. }
  5759. if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
  5760. // We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later,
  5761. // do not delete PHINodes here.
  5762. SI->getDefaultDest()->removePredecessor(BB,
  5763. /*KeepOneInputPHIs=*/true);
  5764. if (DTU)
  5765. Updates.push_back({DominatorTree::Delete, BB, SI->getDefaultDest()});
  5766. }
  5767. for (PHINode *PHI : PHIs) {
  5768. const ResultListTy &ResultList = ResultLists[PHI];
  5769. // If using a bitmask, use any value to fill the lookup table holes.
  5770. Constant *DV = NeedMask ? ResultLists[PHI][0].second : DefaultResults[PHI];
  5771. StringRef FuncName = Fn->getName();
  5772. SwitchLookupTable Table(Mod, TableSize, TableIndexOffset, ResultList, DV,
  5773. DL, FuncName);
  5774. Value *Result = Table.BuildLookup(TableIndex, Builder);
  5775. // Do a small peephole optimization: re-use the switch table compare if
  5776. // possible.
  5777. if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) {
  5778. BasicBlock *PhiBlock = PHI->getParent();
  5779. // Search for compare instructions which use the phi.
  5780. for (auto *User : PHI->users()) {
  5781. reuseTableCompare(User, PhiBlock, RangeCheckBranch, DV, ResultList);
  5782. }
  5783. }
  5784. PHI->addIncoming(Result, LookupBB);
  5785. }
  5786. Builder.CreateBr(CommonDest);
  5787. if (DTU)
  5788. Updates.push_back({DominatorTree::Insert, LookupBB, CommonDest});
  5789. // Remove the switch.
  5790. SmallPtrSet<BasicBlock *, 8> RemovedSuccessors;
  5791. for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
  5792. BasicBlock *Succ = SI->getSuccessor(i);
  5793. if (Succ == SI->getDefaultDest())
  5794. continue;
  5795. Succ->removePredecessor(BB);
  5796. if (DTU && RemovedSuccessors.insert(Succ).second)
  5797. Updates.push_back({DominatorTree::Delete, BB, Succ});
  5798. }
  5799. SI->eraseFromParent();
  5800. if (DTU)
  5801. DTU->applyUpdates(Updates);
  5802. ++NumLookupTables;
  5803. if (NeedMask)
  5804. ++NumLookupTablesHoles;
  5805. return true;
  5806. }
  5807. /// Try to transform a switch that has "holes" in it to a contiguous sequence
  5808. /// of cases.
  5809. ///
  5810. /// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be
  5811. /// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}.
  5812. ///
  5813. /// This converts a sparse switch into a dense switch which allows better
  5814. /// lowering and could also allow transforming into a lookup table.
  5815. static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
  5816. const DataLayout &DL,
  5817. const TargetTransformInfo &TTI) {
  5818. auto *CondTy = cast<IntegerType>(SI->getCondition()->getType());
  5819. if (CondTy->getIntegerBitWidth() > 64 ||
  5820. !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
  5821. return false;
  5822. // Only bother with this optimization if there are more than 3 switch cases;
  5823. // SDAG will only bother creating jump tables for 4 or more cases.
  5824. if (SI->getNumCases() < 4)
  5825. return false;
  5826. // This transform is agnostic to the signedness of the input or case values. We
  5827. // can treat the case values as signed or unsigned. We can optimize more common
  5828. // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values
  5829. // as signed.
  5830. SmallVector<int64_t,4> Values;
  5831. for (const auto &C : SI->cases())
  5832. Values.push_back(C.getCaseValue()->getValue().getSExtValue());
  5833. llvm::sort(Values);
  5834. // If the switch is already dense, there's nothing useful to do here.
  5835. if (isSwitchDense(Values))
  5836. return false;
  5837. // First, transform the values such that they start at zero and ascend.
  5838. int64_t Base = Values[0];
  5839. for (auto &V : Values)
  5840. V -= (uint64_t)(Base);
  5841. // Now we have signed numbers that have been shifted so that, given enough
  5842. // precision, there are no negative values. Since the rest of the transform
  5843. // is bitwise only, we switch now to an unsigned representation.
  5844. // This transform can be done speculatively because it is so cheap - it
  5845. // results in a single rotate operation being inserted.
  5846. // FIXME: It's possible that optimizing a switch on powers of two might also
  5847. // be beneficial - flag values are often powers of two and we could use a CLZ
  5848. // as the key function.
  5849. // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
  5850. // one element and LLVM disallows duplicate cases, Shift is guaranteed to be
  5851. // less than 64.
  5852. unsigned Shift = 64;
  5853. for (auto &V : Values)
  5854. Shift = std::min(Shift, countTrailingZeros((uint64_t)V));
  5855. assert(Shift < 64);
  5856. if (Shift > 0)
  5857. for (auto &V : Values)
  5858. V = (int64_t)((uint64_t)V >> Shift);
  5859. if (!isSwitchDense(Values))
  5860. // Transform didn't create a dense switch.
  5861. return false;
  5862. // The obvious transform is to shift the switch condition right and emit a
  5863. // check that the condition actually cleanly divided by GCD, i.e.
  5864. // C & (1 << Shift - 1) == 0
  5865. // inserting a new CFG edge to handle the case where it didn't divide cleanly.
  5866. //
  5867. // A cheaper way of doing this is a simple ROTR(C, Shift). This performs the
  5868. // shift and puts the shifted-off bits in the uppermost bits. If any of these
  5869. // are nonzero then the switch condition will be very large and will hit the
  5870. // default case.
  5871. auto *Ty = cast<IntegerType>(SI->getCondition()->getType());
  5872. Builder.SetInsertPoint(SI);
  5873. auto *ShiftC = ConstantInt::get(Ty, Shift);
  5874. auto *Sub = Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base));
  5875. auto *LShr = Builder.CreateLShr(Sub, ShiftC);
  5876. auto *Shl = Builder.CreateShl(Sub, Ty->getBitWidth() - Shift);
  5877. auto *Rot = Builder.CreateOr(LShr, Shl);
  5878. SI->replaceUsesOfWith(SI->getCondition(), Rot);
  5879. for (auto Case : SI->cases()) {
  5880. auto *Orig = Case.getCaseValue();
  5881. auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base);
  5882. Case.setValue(
  5883. cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(ShiftC->getValue()))));
  5884. }
  5885. return true;
  5886. }
  5887. bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
  5888. BasicBlock *BB = SI->getParent();
  5889. if (isValueEqualityComparison(SI)) {
  5890. // If we only have one predecessor, and if it is a branch on this value,
  5891. // see if that predecessor totally determines the outcome of this switch.
  5892. if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
  5893. if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
  5894. return requestResimplify();
  5895. Value *Cond = SI->getCondition();
  5896. if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
  5897. if (SimplifySwitchOnSelect(SI, Select))
  5898. return requestResimplify();
  5899. // If the block only contains the switch, see if we can fold the block
  5900. // away into any preds.
  5901. if (SI == &*BB->instructionsWithoutDebug(false).begin())
  5902. if (FoldValueComparisonIntoPredecessors(SI, Builder))
  5903. return requestResimplify();
  5904. }
  5905. // Try to transform the switch into an icmp and a branch.
  5906. // The conversion from switch to comparison may lose information on
  5907. // impossible switch values, so disable it early in the pipeline.
  5908. if (Options.ConvertSwitchRangeToICmp && TurnSwitchRangeIntoICmp(SI, Builder))
  5909. return requestResimplify();
  5910. // Remove unreachable cases.
  5911. if (eliminateDeadSwitchCases(SI, DTU, Options.AC, DL))
  5912. return requestResimplify();
  5913. if (trySwitchToSelect(SI, Builder, DTU, DL, TTI))
  5914. return requestResimplify();
  5915. if (Options.ForwardSwitchCondToPhi && ForwardSwitchConditionToPHI(SI))
  5916. return requestResimplify();
  5917. // The conversion from switch to lookup tables results in difficult-to-analyze
  5918. // code and makes pruning branches much harder. This is a problem if the
  5919. // switch expression itself can still be restricted as a result of inlining or
  5920. // CVP. Therefore, only apply this transformation during late stages of the
  5921. // optimisation pipeline.
  5922. if (Options.ConvertSwitchToLookupTable &&
  5923. SwitchToLookupTable(SI, Builder, DTU, DL, TTI))
  5924. return requestResimplify();
  5925. if (ReduceSwitchRange(SI, Builder, DL, TTI))
  5926. return requestResimplify();
  5927. return false;
  5928. }
  5929. bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) {
  5930. BasicBlock *BB = IBI->getParent();
  5931. bool Changed = false;
  5932. // Eliminate redundant destinations.
  5933. SmallPtrSet<Value *, 8> Succs;
  5934. SmallSetVector<BasicBlock *, 8> RemovedSuccs;
  5935. for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
  5936. BasicBlock *Dest = IBI->getDestination(i);
  5937. if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) {
  5938. if (!Dest->hasAddressTaken())
  5939. RemovedSuccs.insert(Dest);
  5940. Dest->removePredecessor(BB);
  5941. IBI->removeDestination(i);
  5942. --i;
  5943. --e;
  5944. Changed = true;
  5945. }
  5946. }
  5947. if (DTU) {
  5948. std::vector<DominatorTree::UpdateType> Updates;
  5949. Updates.reserve(RemovedSuccs.size());
  5950. for (auto *RemovedSucc : RemovedSuccs)
  5951. Updates.push_back({DominatorTree::Delete, BB, RemovedSucc});
  5952. DTU->applyUpdates(Updates);
  5953. }
  5954. if (IBI->getNumDestinations() == 0) {
  5955. // If the indirectbr has no successors, change it to unreachable.
  5956. new UnreachableInst(IBI->getContext(), IBI);
  5957. EraseTerminatorAndDCECond(IBI);
  5958. return true;
  5959. }
  5960. if (IBI->getNumDestinations() == 1) {
  5961. // If the indirectbr has one successor, change it to a direct branch.
  5962. BranchInst::Create(IBI->getDestination(0), IBI);
  5963. EraseTerminatorAndDCECond(IBI);
  5964. return true;
  5965. }
  5966. if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
  5967. if (SimplifyIndirectBrOnSelect(IBI, SI))
  5968. return requestResimplify();
  5969. }
  5970. return Changed;
  5971. }
  5972. /// Given an block with only a single landing pad and a unconditional branch
  5973. /// try to find another basic block which this one can be merged with. This
  5974. /// handles cases where we have multiple invokes with unique landing pads, but
  5975. /// a shared handler.
  5976. ///
  5977. /// We specifically choose to not worry about merging non-empty blocks
  5978. /// here. That is a PRE/scheduling problem and is best solved elsewhere. In
  5979. /// practice, the optimizer produces empty landing pad blocks quite frequently
  5980. /// when dealing with exception dense code. (see: instcombine, gvn, if-else
  5981. /// sinking in this file)
  5982. ///
  5983. /// This is primarily a code size optimization. We need to avoid performing
  5984. /// any transform which might inhibit optimization (such as our ability to
  5985. /// specialize a particular handler via tail commoning). We do this by not
  5986. /// merging any blocks which require us to introduce a phi. Since the same
  5987. /// values are flowing through both blocks, we don't lose any ability to
  5988. /// specialize. If anything, we make such specialization more likely.
  5989. ///
  5990. /// TODO - This transformation could remove entries from a phi in the target
  5991. /// block when the inputs in the phi are the same for the two blocks being
  5992. /// merged. In some cases, this could result in removal of the PHI entirely.
  5993. static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI,
  5994. BasicBlock *BB, DomTreeUpdater *DTU) {
  5995. auto Succ = BB->getUniqueSuccessor();
  5996. assert(Succ);
  5997. // If there's a phi in the successor block, we'd likely have to introduce
  5998. // a phi into the merged landing pad block.
  5999. if (isa<PHINode>(*Succ->begin()))
  6000. return false;
  6001. for (BasicBlock *OtherPred : predecessors(Succ)) {
  6002. if (BB == OtherPred)
  6003. continue;
  6004. BasicBlock::iterator I = OtherPred->begin();
  6005. LandingPadInst *LPad2 = dyn_cast<LandingPadInst>(I);
  6006. if (!LPad2 || !LPad2->isIdenticalTo(LPad))
  6007. continue;
  6008. for (++I; isa<DbgInfoIntrinsic>(I); ++I)
  6009. ;
  6010. BranchInst *BI2 = dyn_cast<BranchInst>(I);
  6011. if (!BI2 || !BI2->isIdenticalTo(BI))
  6012. continue;
  6013. std::vector<DominatorTree::UpdateType> Updates;
  6014. // We've found an identical block. Update our predecessors to take that
  6015. // path instead and make ourselves dead.
  6016. SmallSetVector<BasicBlock *, 16> UniquePreds(pred_begin(BB), pred_end(BB));
  6017. for (BasicBlock *Pred : UniquePreds) {
  6018. InvokeInst *II = cast<InvokeInst>(Pred->getTerminator());
  6019. assert(II->getNormalDest() != BB && II->getUnwindDest() == BB &&
  6020. "unexpected successor");
  6021. II->setUnwindDest(OtherPred);
  6022. if (DTU) {
  6023. Updates.push_back({DominatorTree::Insert, Pred, OtherPred});
  6024. Updates.push_back({DominatorTree::Delete, Pred, BB});
  6025. }
  6026. }
  6027. // The debug info in OtherPred doesn't cover the merged control flow that
  6028. // used to go through BB. We need to delete it or update it.
  6029. for (Instruction &Inst : llvm::make_early_inc_range(*OtherPred))
  6030. if (isa<DbgInfoIntrinsic>(Inst))
  6031. Inst.eraseFromParent();
  6032. SmallSetVector<BasicBlock *, 16> UniqueSuccs(succ_begin(BB), succ_end(BB));
  6033. for (BasicBlock *Succ : UniqueSuccs) {
  6034. Succ->removePredecessor(BB);
  6035. if (DTU)
  6036. Updates.push_back({DominatorTree::Delete, BB, Succ});
  6037. }
  6038. IRBuilder<> Builder(BI);
  6039. Builder.CreateUnreachable();
  6040. BI->eraseFromParent();
  6041. if (DTU)
  6042. DTU->applyUpdates(Updates);
  6043. return true;
  6044. }
  6045. return false;
  6046. }
  6047. bool SimplifyCFGOpt::simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder) {
  6048. return Branch->isUnconditional() ? simplifyUncondBranch(Branch, Builder)
  6049. : simplifyCondBranch(Branch, Builder);
  6050. }
  6051. bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI,
  6052. IRBuilder<> &Builder) {
  6053. BasicBlock *BB = BI->getParent();
  6054. BasicBlock *Succ = BI->getSuccessor(0);
  6055. // If the Terminator is the only non-phi instruction, simplify the block.
  6056. // If LoopHeader is provided, check if the block or its successor is a loop
  6057. // header. (This is for early invocations before loop simplify and
  6058. // vectorization to keep canonical loop forms for nested loops. These blocks
  6059. // can be eliminated when the pass is invoked later in the back-end.)
  6060. // Note that if BB has only one predecessor then we do not introduce new
  6061. // backedge, so we can eliminate BB.
  6062. bool NeedCanonicalLoop =
  6063. Options.NeedCanonicalLoop &&
  6064. (!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(2) &&
  6065. (is_contained(LoopHeaders, BB) || is_contained(LoopHeaders, Succ)));
  6066. BasicBlock::iterator I = BB->getFirstNonPHIOrDbg(true)->getIterator();
  6067. if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
  6068. !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU))
  6069. return true;
  6070. // If the only instruction in the block is a seteq/setne comparison against a
  6071. // constant, try to simplify the block.
  6072. if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
  6073. if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) {
  6074. for (++I; isa<DbgInfoIntrinsic>(I); ++I)
  6075. ;
  6076. if (I->isTerminator() &&
  6077. tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder))
  6078. return true;
  6079. }
  6080. // See if we can merge an empty landing pad block with another which is
  6081. // equivalent.
  6082. if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) {
  6083. for (++I; isa<DbgInfoIntrinsic>(I); ++I)
  6084. ;
  6085. if (I->isTerminator() && TryToMergeLandingPad(LPad, BI, BB, DTU))
  6086. return true;
  6087. }
  6088. // If this basic block is ONLY a compare and a branch, and if a predecessor
  6089. // branches to us and our successor, fold the comparison into the
  6090. // predecessor and use logical operations to update the incoming value
  6091. // for PHI nodes in common successor.
  6092. if (FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
  6093. Options.BonusInstThreshold))
  6094. return requestResimplify();
  6095. return false;
  6096. }
  6097. static BasicBlock *allPredecessorsComeFromSameSource(BasicBlock *BB) {
  6098. BasicBlock *PredPred = nullptr;
  6099. for (auto *P : predecessors(BB)) {
  6100. BasicBlock *PPred = P->getSinglePredecessor();
  6101. if (!PPred || (PredPred && PredPred != PPred))
  6102. return nullptr;
  6103. PredPred = PPred;
  6104. }
  6105. return PredPred;
  6106. }
  6107. bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
  6108. assert(
  6109. !isa<ConstantInt>(BI->getCondition()) &&
  6110. BI->getSuccessor(0) != BI->getSuccessor(1) &&
  6111. "Tautological conditional branch should have been eliminated already.");
  6112. BasicBlock *BB = BI->getParent();
  6113. if (!Options.SimplifyCondBranch)
  6114. return false;
  6115. // Conditional branch
  6116. if (isValueEqualityComparison(BI)) {
  6117. // If we only have one predecessor, and if it is a branch on this value,
  6118. // see if that predecessor totally determines the outcome of this
  6119. // switch.
  6120. if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
  6121. if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
  6122. return requestResimplify();
  6123. // This block must be empty, except for the setcond inst, if it exists.
  6124. // Ignore dbg and pseudo intrinsics.
  6125. auto I = BB->instructionsWithoutDebug(true).begin();
  6126. if (&*I == BI) {
  6127. if (FoldValueComparisonIntoPredecessors(BI, Builder))
  6128. return requestResimplify();
  6129. } else if (&*I == cast<Instruction>(BI->getCondition())) {
  6130. ++I;
  6131. if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder))
  6132. return requestResimplify();
  6133. }
  6134. }
  6135. // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
  6136. if (SimplifyBranchOnICmpChain(BI, Builder, DL))
  6137. return true;
  6138. // If this basic block has dominating predecessor blocks and the dominating
  6139. // blocks' conditions imply BI's condition, we know the direction of BI.
  6140. std::optional<bool> Imp = isImpliedByDomCondition(BI->getCondition(), BI, DL);
  6141. if (Imp) {
  6142. // Turn this into a branch on constant.
  6143. auto *OldCond = BI->getCondition();
  6144. ConstantInt *TorF = *Imp ? ConstantInt::getTrue(BB->getContext())
  6145. : ConstantInt::getFalse(BB->getContext());
  6146. BI->setCondition(TorF);
  6147. RecursivelyDeleteTriviallyDeadInstructions(OldCond);
  6148. return requestResimplify();
  6149. }
  6150. // If this basic block is ONLY a compare and a branch, and if a predecessor
  6151. // branches to us and one of our successors, fold the comparison into the
  6152. // predecessor and use logical operations to pick the right destination.
  6153. if (FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
  6154. Options.BonusInstThreshold))
  6155. return requestResimplify();
  6156. // We have a conditional branch to two blocks that are only reachable
  6157. // from BI. We know that the condbr dominates the two blocks, so see if
  6158. // there is any identical code in the "then" and "else" blocks. If so, we
  6159. // can hoist it up to the branching block.
  6160. if (BI->getSuccessor(0)->getSinglePredecessor()) {
  6161. if (BI->getSuccessor(1)->getSinglePredecessor()) {
  6162. if (HoistCommon &&
  6163. HoistThenElseCodeToIf(BI, TTI, !Options.HoistCommonInsts))
  6164. return requestResimplify();
  6165. } else {
  6166. // If Successor #1 has multiple preds, we may be able to conditionally
  6167. // execute Successor #0 if it branches to Successor #1.
  6168. Instruction *Succ0TI = BI->getSuccessor(0)->getTerminator();
  6169. if (Succ0TI->getNumSuccessors() == 1 &&
  6170. Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
  6171. if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0), TTI))
  6172. return requestResimplify();
  6173. }
  6174. } else if (BI->getSuccessor(1)->getSinglePredecessor()) {
  6175. // If Successor #0 has multiple preds, we may be able to conditionally
  6176. // execute Successor #1 if it branches to Successor #0.
  6177. Instruction *Succ1TI = BI->getSuccessor(1)->getTerminator();
  6178. if (Succ1TI->getNumSuccessors() == 1 &&
  6179. Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
  6180. if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1), TTI))
  6181. return requestResimplify();
  6182. }
  6183. // If this is a branch on something for which we know the constant value in
  6184. // predecessors (e.g. a phi node in the current block), thread control
  6185. // through this block.
  6186. if (FoldCondBranchOnValueKnownInPredecessor(BI, DTU, DL, Options.AC))
  6187. return requestResimplify();
  6188. // Scan predecessor blocks for conditional branches.
  6189. for (BasicBlock *Pred : predecessors(BB))
  6190. if (BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator()))
  6191. if (PBI != BI && PBI->isConditional())
  6192. if (SimplifyCondBranchToCondBranch(PBI, BI, DTU, DL, TTI))
  6193. return requestResimplify();
  6194. // Look for diamond patterns.
  6195. if (MergeCondStores)
  6196. if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB))
  6197. if (BranchInst *PBI = dyn_cast<BranchInst>(PrevBB->getTerminator()))
  6198. if (PBI != BI && PBI->isConditional())
  6199. if (mergeConditionalStores(PBI, BI, DTU, DL, TTI))
  6200. return requestResimplify();
  6201. return false;
  6202. }
  6203. /// Check if passing a value to an instruction will cause undefined behavior.
  6204. static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified) {
  6205. Constant *C = dyn_cast<Constant>(V);
  6206. if (!C)
  6207. return false;
  6208. if (I->use_empty())
  6209. return false;
  6210. if (C->isNullValue() || isa<UndefValue>(C)) {
  6211. // Only look at the first use, avoid hurting compile time with long uselists
  6212. auto *Use = cast<Instruction>(*I->user_begin());
  6213. // Bail out if Use is not in the same BB as I or Use == I or Use comes
  6214. // before I in the block. The latter two can be the case if Use is a PHI
  6215. // node.
  6216. if (Use->getParent() != I->getParent() || Use == I || Use->comesBefore(I))
  6217. return false;
  6218. // Now make sure that there are no instructions in between that can alter
  6219. // control flow (eg. calls)
  6220. auto InstrRange =
  6221. make_range(std::next(I->getIterator()), Use->getIterator());
  6222. if (any_of(InstrRange, [](Instruction &I) {
  6223. return !isGuaranteedToTransferExecutionToSuccessor(&I);
  6224. }))
  6225. return false;
  6226. // Look through GEPs. A load from a GEP derived from NULL is still undefined
  6227. if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Use))
  6228. if (GEP->getPointerOperand() == I) {
  6229. if (!GEP->isInBounds() || !GEP->hasAllZeroIndices())
  6230. PtrValueMayBeModified = true;
  6231. return passingValueIsAlwaysUndefined(V, GEP, PtrValueMayBeModified);
  6232. }
  6233. // Look through bitcasts.
  6234. if (BitCastInst *BC = dyn_cast<BitCastInst>(Use))
  6235. return passingValueIsAlwaysUndefined(V, BC, PtrValueMayBeModified);
  6236. // Load from null is undefined.
  6237. if (LoadInst *LI = dyn_cast<LoadInst>(Use))
  6238. if (!LI->isVolatile())
  6239. return !NullPointerIsDefined(LI->getFunction(),
  6240. LI->getPointerAddressSpace());
  6241. // Store to null is undefined.
  6242. if (StoreInst *SI = dyn_cast<StoreInst>(Use))
  6243. if (!SI->isVolatile())
  6244. return (!NullPointerIsDefined(SI->getFunction(),
  6245. SI->getPointerAddressSpace())) &&
  6246. SI->getPointerOperand() == I;
  6247. if (auto *CB = dyn_cast<CallBase>(Use)) {
  6248. if (C->isNullValue() && NullPointerIsDefined(CB->getFunction()))
  6249. return false;
  6250. // A call to null is undefined.
  6251. if (CB->getCalledOperand() == I)
  6252. return true;
  6253. if (C->isNullValue()) {
  6254. for (const llvm::Use &Arg : CB->args())
  6255. if (Arg == I) {
  6256. unsigned ArgIdx = CB->getArgOperandNo(&Arg);
  6257. if (CB->isPassingUndefUB(ArgIdx) &&
  6258. CB->paramHasAttr(ArgIdx, Attribute::NonNull)) {
  6259. // Passing null to a nonnnull+noundef argument is undefined.
  6260. return !PtrValueMayBeModified;
  6261. }
  6262. }
  6263. } else if (isa<UndefValue>(C)) {
  6264. // Passing undef to a noundef argument is undefined.
  6265. for (const llvm::Use &Arg : CB->args())
  6266. if (Arg == I) {
  6267. unsigned ArgIdx = CB->getArgOperandNo(&Arg);
  6268. if (CB->isPassingUndefUB(ArgIdx)) {
  6269. // Passing undef to a noundef argument is undefined.
  6270. return true;
  6271. }
  6272. }
  6273. }
  6274. }
  6275. }
  6276. return false;
  6277. }
  6278. /// If BB has an incoming value that will always trigger undefined behavior
  6279. /// (eg. null pointer dereference), remove the branch leading here.
  6280. static bool removeUndefIntroducingPredecessor(BasicBlock *BB,
  6281. DomTreeUpdater *DTU) {
  6282. for (PHINode &PHI : BB->phis())
  6283. for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i)
  6284. if (passingValueIsAlwaysUndefined(PHI.getIncomingValue(i), &PHI)) {
  6285. BasicBlock *Predecessor = PHI.getIncomingBlock(i);
  6286. Instruction *T = Predecessor->getTerminator();
  6287. IRBuilder<> Builder(T);
  6288. if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
  6289. BB->removePredecessor(Predecessor);
  6290. // Turn unconditional branches into unreachables and remove the dead
  6291. // destination from conditional branches.
  6292. if (BI->isUnconditional())
  6293. Builder.CreateUnreachable();
  6294. else {
  6295. // Preserve guarding condition in assume, because it might not be
  6296. // inferrable from any dominating condition.
  6297. Value *Cond = BI->getCondition();
  6298. if (BI->getSuccessor(0) == BB)
  6299. Builder.CreateAssumption(Builder.CreateNot(Cond));
  6300. else
  6301. Builder.CreateAssumption(Cond);
  6302. Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1)
  6303. : BI->getSuccessor(0));
  6304. }
  6305. BI->eraseFromParent();
  6306. if (DTU)
  6307. DTU->applyUpdates({{DominatorTree::Delete, Predecessor, BB}});
  6308. return true;
  6309. } else if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) {
  6310. // Redirect all branches leading to UB into
  6311. // a newly created unreachable block.
  6312. BasicBlock *Unreachable = BasicBlock::Create(
  6313. Predecessor->getContext(), "unreachable", BB->getParent(), BB);
  6314. Builder.SetInsertPoint(Unreachable);
  6315. // The new block contains only one instruction: Unreachable
  6316. Builder.CreateUnreachable();
  6317. for (const auto &Case : SI->cases())
  6318. if (Case.getCaseSuccessor() == BB) {
  6319. BB->removePredecessor(Predecessor);
  6320. Case.setSuccessor(Unreachable);
  6321. }
  6322. if (SI->getDefaultDest() == BB) {
  6323. BB->removePredecessor(Predecessor);
  6324. SI->setDefaultDest(Unreachable);
  6325. }
  6326. if (DTU)
  6327. DTU->applyUpdates(
  6328. { { DominatorTree::Insert, Predecessor, Unreachable },
  6329. { DominatorTree::Delete, Predecessor, BB } });
  6330. return true;
  6331. }
  6332. }
  6333. return false;
  6334. }
  6335. bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
  6336. bool Changed = false;
  6337. assert(BB && BB->getParent() && "Block not embedded in function!");
  6338. assert(BB->getTerminator() && "Degenerate basic block encountered!");
  6339. // Remove basic blocks that have no predecessors (except the entry block)...
  6340. // or that just have themself as a predecessor. These are unreachable.
  6341. if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) ||
  6342. BB->getSinglePredecessor() == BB) {
  6343. LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB);
  6344. DeleteDeadBlock(BB, DTU);
  6345. return true;
  6346. }
  6347. // Check to see if we can constant propagate this terminator instruction
  6348. // away...
  6349. Changed |= ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true,
  6350. /*TLI=*/nullptr, DTU);
  6351. // Check for and eliminate duplicate PHI nodes in this block.
  6352. Changed |= EliminateDuplicatePHINodes(BB);
  6353. // Check for and remove branches that will always cause undefined behavior.
  6354. if (removeUndefIntroducingPredecessor(BB, DTU))
  6355. return requestResimplify();
  6356. // Merge basic blocks into their predecessor if there is only one distinct
  6357. // pred, and if there is only one distinct successor of the predecessor, and
  6358. // if there are no PHI nodes.
  6359. if (MergeBlockIntoPredecessor(BB, DTU))
  6360. return true;
  6361. if (SinkCommon && Options.SinkCommonInsts)
  6362. if (SinkCommonCodeFromPredecessors(BB, DTU) ||
  6363. MergeCompatibleInvokes(BB, DTU)) {
  6364. // SinkCommonCodeFromPredecessors() does not automatically CSE PHI's,
  6365. // so we may now how duplicate PHI's.
  6366. // Let's rerun EliminateDuplicatePHINodes() first,
  6367. // before FoldTwoEntryPHINode() potentially converts them into select's,
  6368. // after which we'd need a whole EarlyCSE pass run to cleanup them.
  6369. return true;
  6370. }
  6371. IRBuilder<> Builder(BB);
  6372. if (Options.FoldTwoEntryPHINode) {
  6373. // If there is a trivial two-entry PHI node in this basic block, and we can
  6374. // eliminate it, do so now.
  6375. if (auto *PN = dyn_cast<PHINode>(BB->begin()))
  6376. if (PN->getNumIncomingValues() == 2)
  6377. if (FoldTwoEntryPHINode(PN, TTI, DTU, DL))
  6378. return true;
  6379. }
  6380. Instruction *Terminator = BB->getTerminator();
  6381. Builder.SetInsertPoint(Terminator);
  6382. switch (Terminator->getOpcode()) {
  6383. case Instruction::Br:
  6384. Changed |= simplifyBranch(cast<BranchInst>(Terminator), Builder);
  6385. break;
  6386. case Instruction::Resume:
  6387. Changed |= simplifyResume(cast<ResumeInst>(Terminator), Builder);
  6388. break;
  6389. case Instruction::CleanupRet:
  6390. Changed |= simplifyCleanupReturn(cast<CleanupReturnInst>(Terminator));
  6391. break;
  6392. case Instruction::Switch:
  6393. Changed |= simplifySwitch(cast<SwitchInst>(Terminator), Builder);
  6394. break;
  6395. case Instruction::Unreachable:
  6396. Changed |= simplifyUnreachable(cast<UnreachableInst>(Terminator));
  6397. break;
  6398. case Instruction::IndirectBr:
  6399. Changed |= simplifyIndirectBr(cast<IndirectBrInst>(Terminator));
  6400. break;
  6401. }
  6402. return Changed;
  6403. }
  6404. bool SimplifyCFGOpt::run(BasicBlock *BB) {
  6405. bool Changed = false;
  6406. // Repeated simplify BB as long as resimplification is requested.
  6407. do {
  6408. Resimplify = false;
  6409. // Perform one round of simplifcation. Resimplify flag will be set if
  6410. // another iteration is requested.
  6411. Changed |= simplifyOnce(BB);
  6412. } while (Resimplify);
  6413. return Changed;
  6414. }
  6415. bool llvm::simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
  6416. DomTreeUpdater *DTU, const SimplifyCFGOptions &Options,
  6417. ArrayRef<WeakVH> LoopHeaders) {
  6418. return SimplifyCFGOpt(TTI, DTU, BB->getModule()->getDataLayout(), LoopHeaders,
  6419. Options)
  6420. .run(BB);
  6421. }