ScalarEvolution.cpp 553 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999600060016002600360046005600660076008600960106011601260136014601560166017601860196020602160226023602460256026602760286029603060316032603360346035603660376038603960406041604260436044604560466047604860496050605160526053605460556056605760586059606060616062606360646065606660676068606960706071607260736074607560766077607860796080608160826083608460856086608760886089609060916092609360946095609660976098609961006101610261036104610561066107610861096110611161126113611461156116611761186119612061216122612361246125612661276128612961306131613261336134613561366137613861396140614161426143614461456146614761486149615061516152615361546155615661576158615961606161616261636164616561666167616861696170617161726173617461756176617761786179618061816182618361846185618661876188618961906191619261936194619561966197619861996200620162026203620462056206620762086209621062116212621362146215621662176218621962206221622262236224622562266227622862296230623162326233623462356236623762386239624062416242624362446245624662476248624962506251625262536254625562566257625862596260626162626263626462656266626762686269627062716272627362746275627662776278627962806281628262836284628562866287628862896290629162926293629462956296629762986299630063016302630363046305630663076308630963106311631263136314631563166317631863196320632163226323632463256326632763286329633063316332633363346335633663376338633963406341634263436344634563466347634863496350635163526353635463556356635763586359636063616362636363646365636663676368636963706371637263736374637563766377637863796380638163826383638463856386638763886389639063916392639363946395639663976398639964006401640264036404640564066407640864096410641164126413641464156416641764186419642064216422642364246425642664276428642964306431643264336434643564366437643864396440644164426443644464456446644764486449645064516452645364546455645664576458645964606461646264636464646564666467646864696470647164726473647464756476647764786479648064816482648364846485648664876488648964906491649264936494649564966497649864996500650165026503650465056506650765086509651065116512651365146515651665176518651965206521652265236524652565266527652865296530653165326533653465356536653765386539654065416542654365446545654665476548654965506551655265536554655565566557655865596560656165626563656465656566656765686569657065716572657365746575657665776578657965806581658265836584658565866587658865896590659165926593659465956596659765986599660066016602660366046605660666076608660966106611661266136614661566166617661866196620662166226623662466256626662766286629663066316632663366346635663666376638663966406641664266436644664566466647664866496650665166526653665466556656665766586659666066616662666366646665666666676668666966706671667266736674667566766677667866796680668166826683668466856686668766886689669066916692669366946695669666976698669967006701670267036704670567066707670867096710671167126713671467156716671767186719672067216722672367246725672667276728672967306731673267336734673567366737673867396740674167426743674467456746674767486749675067516752675367546755675667576758675967606761676267636764676567666767676867696770677167726773677467756776677767786779678067816782678367846785678667876788678967906791679267936794679567966797679867996800680168026803680468056806680768086809681068116812681368146815681668176818681968206821682268236824682568266827682868296830683168326833683468356836683768386839684068416842684368446845684668476848684968506851685268536854685568566857685868596860686168626863686468656866686768686869687068716872687368746875687668776878687968806881688268836884688568866887688868896890689168926893689468956896689768986899690069016902690369046905690669076908690969106911691269136914691569166917691869196920692169226923692469256926692769286929693069316932693369346935693669376938693969406941694269436944694569466947694869496950695169526953695469556956695769586959696069616962696369646965696669676968696969706971697269736974697569766977697869796980698169826983698469856986698769886989699069916992699369946995699669976998699970007001700270037004700570067007700870097010701170127013701470157016701770187019702070217022702370247025702670277028702970307031703270337034703570367037703870397040704170427043704470457046704770487049705070517052705370547055705670577058705970607061706270637064706570667067706870697070707170727073707470757076707770787079708070817082708370847085708670877088708970907091709270937094709570967097709870997100710171027103710471057106710771087109711071117112711371147115711671177118711971207121712271237124712571267127712871297130713171327133713471357136713771387139714071417142714371447145714671477148714971507151715271537154715571567157715871597160716171627163716471657166716771687169717071717172717371747175717671777178717971807181718271837184718571867187718871897190719171927193719471957196719771987199720072017202720372047205720672077208720972107211721272137214721572167217721872197220722172227223722472257226722772287229723072317232723372347235723672377238723972407241724272437244724572467247724872497250725172527253725472557256725772587259726072617262726372647265726672677268726972707271727272737274727572767277727872797280728172827283728472857286728772887289729072917292729372947295729672977298729973007301730273037304730573067307730873097310731173127313731473157316731773187319732073217322732373247325732673277328732973307331733273337334733573367337733873397340734173427343734473457346734773487349735073517352735373547355735673577358735973607361736273637364736573667367736873697370737173727373737473757376737773787379738073817382738373847385738673877388738973907391739273937394739573967397739873997400740174027403740474057406740774087409741074117412741374147415741674177418741974207421742274237424742574267427742874297430743174327433743474357436743774387439744074417442744374447445744674477448744974507451745274537454745574567457745874597460746174627463746474657466746774687469747074717472747374747475747674777478747974807481748274837484748574867487748874897490749174927493749474957496749774987499750075017502750375047505750675077508750975107511751275137514751575167517751875197520752175227523752475257526752775287529753075317532753375347535753675377538753975407541754275437544754575467547754875497550755175527553755475557556755775587559756075617562756375647565756675677568756975707571757275737574757575767577757875797580758175827583758475857586758775887589759075917592759375947595759675977598759976007601760276037604760576067607760876097610761176127613761476157616761776187619762076217622762376247625762676277628762976307631763276337634763576367637763876397640764176427643764476457646764776487649765076517652765376547655765676577658765976607661766276637664766576667667766876697670767176727673767476757676767776787679768076817682768376847685768676877688768976907691769276937694769576967697769876997700770177027703770477057706770777087709771077117712771377147715771677177718771977207721772277237724772577267727772877297730773177327733773477357736773777387739774077417742774377447745774677477748774977507751775277537754775577567757775877597760776177627763776477657766776777687769777077717772777377747775777677777778777977807781778277837784778577867787778877897790779177927793779477957796779777987799780078017802780378047805780678077808780978107811781278137814781578167817781878197820782178227823782478257826782778287829783078317832783378347835783678377838783978407841784278437844784578467847784878497850785178527853785478557856785778587859786078617862786378647865786678677868786978707871787278737874787578767877787878797880788178827883788478857886788778887889789078917892789378947895789678977898789979007901790279037904790579067907790879097910791179127913791479157916791779187919792079217922792379247925792679277928792979307931793279337934793579367937793879397940794179427943794479457946794779487949795079517952795379547955795679577958795979607961796279637964796579667967796879697970797179727973797479757976797779787979798079817982798379847985798679877988798979907991799279937994799579967997799879998000800180028003800480058006800780088009801080118012801380148015801680178018801980208021802280238024802580268027802880298030803180328033803480358036803780388039804080418042804380448045804680478048804980508051805280538054805580568057805880598060806180628063806480658066806780688069807080718072807380748075807680778078807980808081808280838084808580868087808880898090809180928093809480958096809780988099810081018102810381048105810681078108810981108111811281138114811581168117811881198120812181228123812481258126812781288129813081318132813381348135813681378138813981408141814281438144814581468147814881498150815181528153815481558156815781588159816081618162816381648165816681678168816981708171817281738174817581768177817881798180818181828183818481858186818781888189819081918192819381948195819681978198819982008201820282038204820582068207820882098210821182128213821482158216821782188219822082218222822382248225822682278228822982308231823282338234823582368237823882398240824182428243824482458246824782488249825082518252825382548255825682578258825982608261826282638264826582668267826882698270827182728273827482758276827782788279828082818282828382848285828682878288828982908291829282938294829582968297829882998300830183028303830483058306830783088309831083118312831383148315831683178318831983208321832283238324832583268327832883298330833183328333833483358336833783388339834083418342834383448345834683478348834983508351835283538354835583568357835883598360836183628363836483658366836783688369837083718372837383748375837683778378837983808381838283838384838583868387838883898390839183928393839483958396839783988399840084018402840384048405840684078408840984108411841284138414841584168417841884198420842184228423842484258426842784288429843084318432843384348435843684378438843984408441844284438444844584468447844884498450845184528453845484558456845784588459846084618462846384648465846684678468846984708471847284738474847584768477847884798480848184828483848484858486848784888489849084918492849384948495849684978498849985008501850285038504850585068507850885098510851185128513851485158516851785188519852085218522852385248525852685278528852985308531853285338534853585368537853885398540854185428543854485458546854785488549855085518552855385548555855685578558855985608561856285638564856585668567856885698570857185728573857485758576857785788579858085818582858385848585858685878588858985908591859285938594859585968597859885998600860186028603860486058606860786088609861086118612861386148615861686178618861986208621862286238624862586268627862886298630863186328633863486358636863786388639864086418642864386448645864686478648864986508651865286538654865586568657865886598660866186628663866486658666866786688669867086718672867386748675867686778678867986808681868286838684868586868687868886898690869186928693869486958696869786988699870087018702870387048705870687078708870987108711871287138714871587168717871887198720872187228723872487258726872787288729873087318732873387348735873687378738873987408741874287438744874587468747874887498750875187528753875487558756875787588759876087618762876387648765876687678768876987708771877287738774877587768777877887798780878187828783878487858786878787888789879087918792879387948795879687978798879988008801880288038804880588068807880888098810881188128813881488158816881788188819882088218822882388248825882688278828882988308831883288338834883588368837883888398840884188428843884488458846884788488849885088518852885388548855885688578858885988608861886288638864886588668867886888698870887188728873887488758876887788788879888088818882888388848885888688878888888988908891889288938894889588968897889888998900890189028903890489058906890789088909891089118912891389148915891689178918891989208921892289238924892589268927892889298930893189328933893489358936893789388939894089418942894389448945894689478948894989508951895289538954895589568957895889598960896189628963896489658966896789688969897089718972897389748975897689778978897989808981898289838984898589868987898889898990899189928993899489958996899789988999900090019002900390049005900690079008900990109011901290139014901590169017901890199020902190229023902490259026902790289029903090319032903390349035903690379038903990409041904290439044904590469047904890499050905190529053905490559056905790589059906090619062906390649065906690679068906990709071907290739074907590769077907890799080908190829083908490859086908790889089909090919092909390949095909690979098909991009101910291039104910591069107910891099110911191129113911491159116911791189119912091219122912391249125912691279128912991309131913291339134913591369137913891399140914191429143914491459146914791489149915091519152915391549155915691579158915991609161916291639164916591669167916891699170917191729173917491759176917791789179918091819182918391849185918691879188918991909191919291939194919591969197919891999200920192029203920492059206920792089209921092119212921392149215921692179218921992209221922292239224922592269227922892299230923192329233923492359236923792389239924092419242924392449245924692479248924992509251925292539254925592569257925892599260926192629263926492659266926792689269927092719272927392749275927692779278927992809281928292839284928592869287928892899290929192929293929492959296929792989299930093019302930393049305930693079308930993109311931293139314931593169317931893199320932193229323932493259326932793289329933093319332933393349335933693379338933993409341934293439344934593469347934893499350935193529353935493559356935793589359936093619362936393649365936693679368936993709371937293739374937593769377937893799380938193829383938493859386938793889389939093919392939393949395939693979398939994009401940294039404940594069407940894099410941194129413941494159416941794189419942094219422942394249425942694279428942994309431943294339434943594369437943894399440944194429443944494459446944794489449945094519452945394549455945694579458945994609461946294639464946594669467946894699470947194729473947494759476947794789479948094819482948394849485948694879488948994909491949294939494949594969497949894999500950195029503950495059506950795089509951095119512951395149515951695179518951995209521952295239524952595269527952895299530953195329533953495359536953795389539954095419542954395449545954695479548954995509551955295539554955595569557955895599560956195629563956495659566956795689569957095719572957395749575957695779578957995809581958295839584958595869587958895899590959195929593959495959596959795989599960096019602960396049605960696079608960996109611961296139614961596169617961896199620962196229623962496259626962796289629963096319632963396349635963696379638963996409641964296439644964596469647964896499650965196529653965496559656965796589659966096619662966396649665966696679668966996709671967296739674967596769677967896799680968196829683968496859686968796889689969096919692969396949695969696979698969997009701970297039704970597069707970897099710971197129713971497159716971797189719972097219722972397249725972697279728972997309731973297339734973597369737973897399740974197429743974497459746974797489749975097519752975397549755975697579758975997609761976297639764976597669767976897699770977197729773977497759776977797789779978097819782978397849785978697879788978997909791979297939794979597969797979897999800980198029803980498059806980798089809981098119812981398149815981698179818981998209821982298239824982598269827982898299830983198329833983498359836983798389839984098419842984398449845984698479848984998509851985298539854985598569857985898599860986198629863986498659866986798689869987098719872987398749875987698779878987998809881988298839884988598869887988898899890989198929893989498959896989798989899990099019902990399049905990699079908990999109911991299139914991599169917991899199920992199229923992499259926992799289929993099319932993399349935993699379938993999409941994299439944994599469947994899499950995199529953995499559956995799589959996099619962996399649965996699679968996999709971997299739974997599769977997899799980998199829983998499859986998799889989999099919992999399949995999699979998999910000100011000210003100041000510006100071000810009100101001110012100131001410015100161001710018100191002010021100221002310024100251002610027100281002910030100311003210033100341003510036100371003810039100401004110042100431004410045100461004710048100491005010051100521005310054100551005610057100581005910060100611006210063100641006510066100671006810069100701007110072100731007410075100761007710078100791008010081100821008310084100851008610087100881008910090100911009210093100941009510096100971009810099101001010110102101031010410105101061010710108101091011010111101121011310114101151011610117101181011910120101211012210123101241012510126101271012810129101301013110132101331013410135101361013710138101391014010141101421014310144101451014610147101481014910150101511015210153101541015510156101571015810159101601016110162101631016410165101661016710168101691017010171101721017310174101751017610177101781017910180101811018210183101841018510186101871018810189101901019110192101931019410195101961019710198101991020010201102021020310204102051020610207102081020910210102111021210213102141021510216102171021810219102201022110222102231022410225102261022710228102291023010231102321023310234102351023610237102381023910240102411024210243102441024510246102471024810249102501025110252102531025410255102561025710258102591026010261102621026310264102651026610267102681026910270102711027210273102741027510276102771027810279102801028110282102831028410285102861028710288102891029010291102921029310294102951029610297102981029910300103011030210303103041030510306103071030810309103101031110312103131031410315103161031710318103191032010321103221032310324103251032610327103281032910330103311033210333103341033510336103371033810339103401034110342103431034410345103461034710348103491035010351103521035310354103551035610357103581035910360103611036210363103641036510366103671036810369103701037110372103731037410375103761037710378103791038010381103821038310384103851038610387103881038910390103911039210393103941039510396103971039810399104001040110402104031040410405104061040710408104091041010411104121041310414104151041610417104181041910420104211042210423104241042510426104271042810429104301043110432104331043410435104361043710438104391044010441104421044310444104451044610447104481044910450104511045210453104541045510456104571045810459104601046110462104631046410465104661046710468104691047010471104721047310474104751047610477104781047910480104811048210483104841048510486104871048810489104901049110492104931049410495104961049710498104991050010501105021050310504105051050610507105081050910510105111051210513105141051510516105171051810519105201052110522105231052410525105261052710528105291053010531105321053310534105351053610537105381053910540105411054210543105441054510546105471054810549105501055110552105531055410555105561055710558105591056010561105621056310564105651056610567105681056910570105711057210573105741057510576105771057810579105801058110582105831058410585105861058710588105891059010591105921059310594105951059610597105981059910600106011060210603106041060510606106071060810609106101061110612106131061410615106161061710618106191062010621106221062310624106251062610627106281062910630106311063210633106341063510636106371063810639106401064110642106431064410645106461064710648106491065010651106521065310654106551065610657106581065910660106611066210663106641066510666106671066810669106701067110672106731067410675106761067710678106791068010681106821068310684106851068610687106881068910690106911069210693106941069510696106971069810699107001070110702107031070410705107061070710708107091071010711107121071310714107151071610717107181071910720107211072210723107241072510726107271072810729107301073110732107331073410735107361073710738107391074010741107421074310744107451074610747107481074910750107511075210753107541075510756107571075810759107601076110762107631076410765107661076710768107691077010771107721077310774107751077610777107781077910780107811078210783107841078510786107871078810789107901079110792107931079410795107961079710798107991080010801108021080310804108051080610807108081080910810108111081210813108141081510816108171081810819108201082110822108231082410825108261082710828108291083010831108321083310834108351083610837108381083910840108411084210843108441084510846108471084810849108501085110852108531085410855108561085710858108591086010861108621086310864108651086610867108681086910870108711087210873108741087510876108771087810879108801088110882108831088410885108861088710888108891089010891108921089310894108951089610897108981089910900109011090210903109041090510906109071090810909109101091110912109131091410915109161091710918109191092010921109221092310924109251092610927109281092910930109311093210933109341093510936109371093810939109401094110942109431094410945109461094710948109491095010951109521095310954109551095610957109581095910960109611096210963109641096510966109671096810969109701097110972109731097410975109761097710978109791098010981109821098310984109851098610987109881098910990109911099210993109941099510996109971099810999110001100111002110031100411005110061100711008110091101011011110121101311014110151101611017110181101911020110211102211023110241102511026110271102811029110301103111032110331103411035110361103711038110391104011041110421104311044110451104611047110481104911050110511105211053110541105511056110571105811059110601106111062110631106411065110661106711068110691107011071110721107311074110751107611077110781107911080110811108211083110841108511086110871108811089110901109111092110931109411095110961109711098110991110011101111021110311104111051110611107111081110911110111111111211113111141111511116111171111811119111201112111122111231112411125111261112711128111291113011131111321113311134111351113611137111381113911140111411114211143111441114511146111471114811149111501115111152111531115411155111561115711158111591116011161111621116311164111651116611167111681116911170111711117211173111741117511176111771117811179111801118111182111831118411185111861118711188111891119011191111921119311194111951119611197111981119911200112011120211203112041120511206112071120811209112101121111212112131121411215112161121711218112191122011221112221122311224112251122611227112281122911230112311123211233112341123511236112371123811239112401124111242112431124411245112461124711248112491125011251112521125311254112551125611257112581125911260112611126211263112641126511266112671126811269112701127111272112731127411275112761127711278112791128011281112821128311284112851128611287112881128911290112911129211293112941129511296112971129811299113001130111302113031130411305113061130711308113091131011311113121131311314113151131611317113181131911320113211132211323113241132511326113271132811329113301133111332113331133411335113361133711338113391134011341113421134311344113451134611347113481134911350113511135211353113541135511356113571135811359113601136111362113631136411365113661136711368113691137011371113721137311374113751137611377113781137911380113811138211383113841138511386113871138811389113901139111392113931139411395113961139711398113991140011401114021140311404114051140611407114081140911410114111141211413114141141511416114171141811419114201142111422114231142411425114261142711428114291143011431114321143311434114351143611437114381143911440114411144211443114441144511446114471144811449114501145111452114531145411455114561145711458114591146011461114621146311464114651146611467114681146911470114711147211473114741147511476114771147811479114801148111482114831148411485114861148711488114891149011491114921149311494114951149611497114981149911500115011150211503115041150511506115071150811509115101151111512115131151411515115161151711518115191152011521115221152311524115251152611527115281152911530115311153211533115341153511536115371153811539115401154111542115431154411545115461154711548115491155011551115521155311554115551155611557115581155911560115611156211563115641156511566115671156811569115701157111572115731157411575115761157711578115791158011581115821158311584115851158611587115881158911590115911159211593115941159511596115971159811599116001160111602116031160411605116061160711608116091161011611116121161311614116151161611617116181161911620116211162211623116241162511626116271162811629116301163111632116331163411635116361163711638116391164011641116421164311644116451164611647116481164911650116511165211653116541165511656116571165811659116601166111662116631166411665116661166711668116691167011671116721167311674116751167611677116781167911680116811168211683116841168511686116871168811689116901169111692116931169411695116961169711698116991170011701117021170311704117051170611707117081170911710117111171211713117141171511716117171171811719117201172111722117231172411725117261172711728117291173011731117321173311734117351173611737117381173911740117411174211743117441174511746117471174811749117501175111752117531175411755117561175711758117591176011761117621176311764117651176611767117681176911770117711177211773117741177511776117771177811779117801178111782117831178411785117861178711788117891179011791117921179311794117951179611797117981179911800118011180211803118041180511806118071180811809118101181111812118131181411815118161181711818118191182011821118221182311824118251182611827118281182911830118311183211833118341183511836118371183811839118401184111842118431184411845118461184711848118491185011851118521185311854118551185611857118581185911860118611186211863118641186511866118671186811869118701187111872118731187411875118761187711878118791188011881118821188311884118851188611887118881188911890118911189211893118941189511896118971189811899119001190111902119031190411905119061190711908119091191011911119121191311914119151191611917119181191911920119211192211923119241192511926119271192811929119301193111932119331193411935119361193711938119391194011941119421194311944119451194611947119481194911950119511195211953119541195511956119571195811959119601196111962119631196411965119661196711968119691197011971119721197311974119751197611977119781197911980119811198211983119841198511986119871198811989119901199111992119931199411995119961199711998119991200012001120021200312004120051200612007120081200912010120111201212013120141201512016120171201812019120201202112022120231202412025120261202712028120291203012031120321203312034120351203612037120381203912040120411204212043120441204512046120471204812049120501205112052120531205412055120561205712058120591206012061120621206312064120651206612067120681206912070120711207212073120741207512076120771207812079120801208112082120831208412085120861208712088120891209012091120921209312094120951209612097120981209912100121011210212103121041210512106121071210812109121101211112112121131211412115121161211712118121191212012121121221212312124121251212612127121281212912130121311213212133121341213512136121371213812139121401214112142121431214412145121461214712148121491215012151121521215312154121551215612157121581215912160121611216212163121641216512166121671216812169121701217112172121731217412175121761217712178121791218012181121821218312184121851218612187121881218912190121911219212193121941219512196121971219812199122001220112202122031220412205122061220712208122091221012211122121221312214122151221612217122181221912220122211222212223122241222512226122271222812229122301223112232122331223412235122361223712238122391224012241122421224312244122451224612247122481224912250122511225212253122541225512256122571225812259122601226112262122631226412265122661226712268122691227012271122721227312274122751227612277122781227912280122811228212283122841228512286122871228812289122901229112292122931229412295122961229712298122991230012301123021230312304123051230612307123081230912310123111231212313123141231512316123171231812319123201232112322123231232412325123261232712328123291233012331123321233312334123351233612337123381233912340123411234212343123441234512346123471234812349123501235112352123531235412355123561235712358123591236012361123621236312364123651236612367123681236912370123711237212373123741237512376123771237812379123801238112382123831238412385123861238712388123891239012391123921239312394123951239612397123981239912400124011240212403124041240512406124071240812409124101241112412124131241412415124161241712418124191242012421124221242312424124251242612427124281242912430124311243212433124341243512436124371243812439124401244112442124431244412445124461244712448124491245012451124521245312454124551245612457124581245912460124611246212463124641246512466124671246812469124701247112472124731247412475124761247712478124791248012481124821248312484124851248612487124881248912490124911249212493124941249512496124971249812499125001250112502125031250412505125061250712508125091251012511125121251312514125151251612517125181251912520125211252212523125241252512526125271252812529125301253112532125331253412535125361253712538125391254012541125421254312544125451254612547125481254912550125511255212553125541255512556125571255812559125601256112562125631256412565125661256712568125691257012571125721257312574125751257612577125781257912580125811258212583125841258512586125871258812589125901259112592125931259412595125961259712598125991260012601126021260312604126051260612607126081260912610126111261212613126141261512616126171261812619126201262112622126231262412625126261262712628126291263012631126321263312634126351263612637126381263912640126411264212643126441264512646126471264812649126501265112652126531265412655126561265712658126591266012661126621266312664126651266612667126681266912670126711267212673126741267512676126771267812679126801268112682126831268412685126861268712688126891269012691126921269312694126951269612697126981269912700127011270212703127041270512706127071270812709127101271112712127131271412715127161271712718127191272012721127221272312724127251272612727127281272912730127311273212733127341273512736127371273812739127401274112742127431274412745127461274712748127491275012751127521275312754127551275612757127581275912760127611276212763127641276512766127671276812769127701277112772127731277412775127761277712778127791278012781127821278312784127851278612787127881278912790127911279212793127941279512796127971279812799128001280112802128031280412805128061280712808128091281012811128121281312814128151281612817128181281912820128211282212823128241282512826128271282812829128301283112832128331283412835128361283712838128391284012841128421284312844128451284612847128481284912850128511285212853128541285512856128571285812859128601286112862128631286412865128661286712868128691287012871128721287312874128751287612877128781287912880128811288212883128841288512886128871288812889128901289112892128931289412895128961289712898128991290012901129021290312904129051290612907129081290912910129111291212913129141291512916129171291812919129201292112922129231292412925129261292712928129291293012931129321293312934129351293612937129381293912940129411294212943129441294512946129471294812949129501295112952129531295412955129561295712958129591296012961129621296312964129651296612967129681296912970129711297212973129741297512976129771297812979129801298112982129831298412985129861298712988129891299012991129921299312994129951299612997129981299913000130011300213003130041300513006130071300813009130101301113012130131301413015130161301713018130191302013021130221302313024130251302613027130281302913030130311303213033130341303513036130371303813039130401304113042130431304413045130461304713048130491305013051130521305313054130551305613057130581305913060130611306213063130641306513066130671306813069130701307113072130731307413075130761307713078130791308013081130821308313084130851308613087130881308913090130911309213093130941309513096130971309813099131001310113102131031310413105131061310713108131091311013111131121311313114131151311613117131181311913120131211312213123131241312513126131271312813129131301313113132131331313413135131361313713138131391314013141131421314313144131451314613147131481314913150131511315213153131541315513156131571315813159131601316113162131631316413165131661316713168131691317013171131721317313174131751317613177131781317913180131811318213183131841318513186131871318813189131901319113192131931319413195131961319713198131991320013201132021320313204132051320613207132081320913210132111321213213132141321513216132171321813219132201322113222132231322413225132261322713228132291323013231132321323313234132351323613237132381323913240132411324213243132441324513246132471324813249132501325113252132531325413255132561325713258132591326013261132621326313264132651326613267132681326913270132711327213273132741327513276132771327813279132801328113282132831328413285132861328713288132891329013291132921329313294132951329613297132981329913300133011330213303133041330513306133071330813309133101331113312133131331413315133161331713318133191332013321133221332313324133251332613327133281332913330133311333213333133341333513336133371333813339133401334113342133431334413345133461334713348133491335013351133521335313354133551335613357133581335913360133611336213363133641336513366133671336813369133701337113372133731337413375133761337713378133791338013381133821338313384133851338613387133881338913390133911339213393133941339513396133971339813399134001340113402134031340413405134061340713408134091341013411134121341313414134151341613417134181341913420134211342213423134241342513426134271342813429134301343113432134331343413435134361343713438134391344013441134421344313444134451344613447134481344913450134511345213453134541345513456134571345813459134601346113462134631346413465134661346713468134691347013471134721347313474134751347613477134781347913480134811348213483134841348513486134871348813489134901349113492134931349413495134961349713498134991350013501135021350313504135051350613507135081350913510135111351213513135141351513516135171351813519135201352113522135231352413525135261352713528135291353013531135321353313534135351353613537135381353913540135411354213543135441354513546135471354813549135501355113552135531355413555135561355713558135591356013561135621356313564135651356613567135681356913570135711357213573135741357513576135771357813579135801358113582135831358413585135861358713588135891359013591135921359313594135951359613597135981359913600136011360213603136041360513606136071360813609136101361113612136131361413615136161361713618136191362013621136221362313624136251362613627136281362913630136311363213633136341363513636136371363813639136401364113642136431364413645136461364713648136491365013651136521365313654136551365613657136581365913660136611366213663136641366513666136671366813669136701367113672136731367413675136761367713678136791368013681136821368313684136851368613687136881368913690136911369213693136941369513696136971369813699137001370113702137031370413705137061370713708137091371013711137121371313714137151371613717137181371913720137211372213723137241372513726137271372813729137301373113732137331373413735137361373713738137391374013741137421374313744137451374613747137481374913750137511375213753137541375513756137571375813759137601376113762137631376413765137661376713768137691377013771137721377313774137751377613777137781377913780137811378213783137841378513786137871378813789137901379113792137931379413795137961379713798137991380013801138021380313804138051380613807138081380913810138111381213813138141381513816138171381813819138201382113822138231382413825138261382713828138291383013831138321383313834138351383613837138381383913840138411384213843138441384513846138471384813849138501385113852138531385413855138561385713858138591386013861138621386313864138651386613867138681386913870138711387213873138741387513876138771387813879138801388113882138831388413885138861388713888138891389013891138921389313894138951389613897138981389913900139011390213903139041390513906139071390813909139101391113912139131391413915139161391713918139191392013921139221392313924139251392613927139281392913930139311393213933139341393513936139371393813939139401394113942139431394413945139461394713948139491395013951139521395313954139551395613957139581395913960139611396213963139641396513966139671396813969139701397113972139731397413975139761397713978139791398013981139821398313984139851398613987139881398913990139911399213993139941399513996139971399813999140001400114002140031400414005140061400714008140091401014011140121401314014140151401614017140181401914020140211402214023140241402514026140271402814029140301403114032140331403414035140361403714038140391404014041140421404314044140451404614047140481404914050140511405214053140541405514056140571405814059140601406114062140631406414065140661406714068140691407014071140721407314074140751407614077140781407914080140811408214083140841408514086140871408814089140901409114092140931409414095140961409714098140991410014101141021410314104141051410614107141081410914110141111411214113141141411514116141171411814119141201412114122141231412414125141261412714128141291413014131141321413314134141351413614137141381413914140141411414214143141441414514146141471414814149141501415114152141531415414155141561415714158141591416014161141621416314164141651416614167141681416914170141711417214173141741417514176141771417814179141801418114182141831418414185141861418714188141891419014191141921419314194141951419614197141981419914200142011420214203142041420514206142071420814209142101421114212142131421414215142161421714218142191422014221142221422314224142251422614227142281422914230142311423214233142341423514236142371423814239142401424114242142431424414245142461424714248142491425014251142521425314254142551425614257142581425914260142611426214263142641426514266142671426814269142701427114272142731427414275142761427714278142791428014281142821428314284142851428614287142881428914290142911429214293142941429514296142971429814299143001430114302143031430414305143061430714308143091431014311143121431314314143151431614317143181431914320143211432214323143241432514326
  1. //===- ScalarEvolution.cpp - Scalar Evolution Analysis --------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file contains the implementation of the scalar evolution analysis
  10. // engine, which is used primarily to analyze expressions involving induction
  11. // variables in loops.
  12. //
  13. // There are several aspects to this library. First is the representation of
  14. // scalar expressions, which are represented as subclasses of the SCEV class.
  15. // These classes are used to represent certain types of subexpressions that we
  16. // can handle. We only create one SCEV of a particular shape, so
  17. // pointer-comparisons for equality are legal.
  18. //
  19. // One important aspect of the SCEV objects is that they are never cyclic, even
  20. // if there is a cycle in the dataflow for an expression (ie, a PHI node). If
  21. // the PHI node is one of the idioms that we can represent (e.g., a polynomial
  22. // recurrence) then we represent it directly as a recurrence node, otherwise we
  23. // represent it as a SCEVUnknown node.
  24. //
  25. // In addition to being able to represent expressions of various types, we also
  26. // have folders that are used to build the *canonical* representation for a
  27. // particular expression. These folders are capable of using a variety of
  28. // rewrite rules to simplify the expressions.
  29. //
  30. // Once the folders are defined, we can implement the more interesting
  31. // higher-level code, such as the code that recognizes PHI nodes of various
  32. // types, computes the execution count of a loop, etc.
  33. //
  34. // TODO: We should use these routines and value representations to implement
  35. // dependence analysis!
  36. //
  37. //===----------------------------------------------------------------------===//
  38. //
  39. // There are several good references for the techniques used in this analysis.
  40. //
  41. // Chains of recurrences -- a method to expedite the evaluation
  42. // of closed-form functions
  43. // Olaf Bachmann, Paul S. Wang, Eugene V. Zima
  44. //
  45. // On computational properties of chains of recurrences
  46. // Eugene V. Zima
  47. //
  48. // Symbolic Evaluation of Chains of Recurrences for Loop Optimization
  49. // Robert A. van Engelen
  50. //
  51. // Efficient Symbolic Analysis for Optimizing Compilers
  52. // Robert A. van Engelen
  53. //
  54. // Using the chains of recurrences algebra for data dependence testing and
  55. // induction variable substitution
  56. // MS Thesis, Johnie Birch
  57. //
  58. //===----------------------------------------------------------------------===//
  59. #include "llvm/Analysis/ScalarEvolution.h"
  60. #include "llvm/ADT/APInt.h"
  61. #include "llvm/ADT/ArrayRef.h"
  62. #include "llvm/ADT/DenseMap.h"
  63. #include "llvm/ADT/DepthFirstIterator.h"
  64. #include "llvm/ADT/EquivalenceClasses.h"
  65. #include "llvm/ADT/FoldingSet.h"
  66. #include "llvm/ADT/None.h"
  67. #include "llvm/ADT/Optional.h"
  68. #include "llvm/ADT/STLExtras.h"
  69. #include "llvm/ADT/ScopeExit.h"
  70. #include "llvm/ADT/Sequence.h"
  71. #include "llvm/ADT/SetVector.h"
  72. #include "llvm/ADT/SmallPtrSet.h"
  73. #include "llvm/ADT/SmallSet.h"
  74. #include "llvm/ADT/SmallVector.h"
  75. #include "llvm/ADT/Statistic.h"
  76. #include "llvm/ADT/StringRef.h"
  77. #include "llvm/Analysis/AssumptionCache.h"
  78. #include "llvm/Analysis/ConstantFolding.h"
  79. #include "llvm/Analysis/InstructionSimplify.h"
  80. #include "llvm/Analysis/LoopInfo.h"
  81. #include "llvm/Analysis/ScalarEvolutionDivision.h"
  82. #include "llvm/Analysis/ScalarEvolutionExpressions.h"
  83. #include "llvm/Analysis/TargetLibraryInfo.h"
  84. #include "llvm/Analysis/ValueTracking.h"
  85. #include "llvm/Config/llvm-config.h"
  86. #include "llvm/IR/Argument.h"
  87. #include "llvm/IR/BasicBlock.h"
  88. #include "llvm/IR/CFG.h"
  89. #include "llvm/IR/Constant.h"
  90. #include "llvm/IR/ConstantRange.h"
  91. #include "llvm/IR/Constants.h"
  92. #include "llvm/IR/DataLayout.h"
  93. #include "llvm/IR/DerivedTypes.h"
  94. #include "llvm/IR/Dominators.h"
  95. #include "llvm/IR/Function.h"
  96. #include "llvm/IR/GlobalAlias.h"
  97. #include "llvm/IR/GlobalValue.h"
  98. #include "llvm/IR/GlobalVariable.h"
  99. #include "llvm/IR/InstIterator.h"
  100. #include "llvm/IR/InstrTypes.h"
  101. #include "llvm/IR/Instruction.h"
  102. #include "llvm/IR/Instructions.h"
  103. #include "llvm/IR/IntrinsicInst.h"
  104. #include "llvm/IR/Intrinsics.h"
  105. #include "llvm/IR/LLVMContext.h"
  106. #include "llvm/IR/Metadata.h"
  107. #include "llvm/IR/Operator.h"
  108. #include "llvm/IR/PatternMatch.h"
  109. #include "llvm/IR/Type.h"
  110. #include "llvm/IR/Use.h"
  111. #include "llvm/IR/User.h"
  112. #include "llvm/IR/Value.h"
  113. #include "llvm/IR/Verifier.h"
  114. #include "llvm/InitializePasses.h"
  115. #include "llvm/Pass.h"
  116. #include "llvm/Support/Casting.h"
  117. #include "llvm/Support/CommandLine.h"
  118. #include "llvm/Support/Compiler.h"
  119. #include "llvm/Support/Debug.h"
  120. #include "llvm/Support/ErrorHandling.h"
  121. #include "llvm/Support/KnownBits.h"
  122. #include "llvm/Support/SaveAndRestore.h"
  123. #include "llvm/Support/raw_ostream.h"
  124. #include <algorithm>
  125. #include <cassert>
  126. #include <climits>
  127. #include <cstddef>
  128. #include <cstdint>
  129. #include <cstdlib>
  130. #include <map>
  131. #include <memory>
  132. #include <tuple>
  133. #include <utility>
  134. #include <vector>
  135. using namespace llvm;
  136. using namespace PatternMatch;
  137. #define DEBUG_TYPE "scalar-evolution"
  138. STATISTIC(NumTripCountsComputed,
  139. "Number of loops with predictable loop counts");
  140. STATISTIC(NumTripCountsNotComputed,
  141. "Number of loops without predictable loop counts");
  142. STATISTIC(NumBruteForceTripCountsComputed,
  143. "Number of loops with trip counts computed by force");
  144. static cl::opt<unsigned>
  145. MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden,
  146. cl::ZeroOrMore,
  147. cl::desc("Maximum number of iterations SCEV will "
  148. "symbolically execute a constant "
  149. "derived loop"),
  150. cl::init(100));
  151. // FIXME: Enable this with EXPENSIVE_CHECKS when the test suite is clean.
  152. static cl::opt<bool> VerifySCEV(
  153. "verify-scev", cl::Hidden,
  154. cl::desc("Verify ScalarEvolution's backedge taken counts (slow)"));
  155. static cl::opt<bool> VerifySCEVStrict(
  156. "verify-scev-strict", cl::Hidden,
  157. cl::desc("Enable stricter verification with -verify-scev is passed"));
  158. static cl::opt<bool>
  159. VerifySCEVMap("verify-scev-maps", cl::Hidden,
  160. cl::desc("Verify no dangling value in ScalarEvolution's "
  161. "ExprValueMap (slow)"));
  162. static cl::opt<bool> VerifyIR(
  163. "scev-verify-ir", cl::Hidden,
  164. cl::desc("Verify IR correctness when making sensitive SCEV queries (slow)"),
  165. cl::init(false));
  166. static cl::opt<unsigned> MulOpsInlineThreshold(
  167. "scev-mulops-inline-threshold", cl::Hidden,
  168. cl::desc("Threshold for inlining multiplication operands into a SCEV"),
  169. cl::init(32));
  170. static cl::opt<unsigned> AddOpsInlineThreshold(
  171. "scev-addops-inline-threshold", cl::Hidden,
  172. cl::desc("Threshold for inlining addition operands into a SCEV"),
  173. cl::init(500));
  174. static cl::opt<unsigned> MaxSCEVCompareDepth(
  175. "scalar-evolution-max-scev-compare-depth", cl::Hidden,
  176. cl::desc("Maximum depth of recursive SCEV complexity comparisons"),
  177. cl::init(32));
  178. static cl::opt<unsigned> MaxSCEVOperationsImplicationDepth(
  179. "scalar-evolution-max-scev-operations-implication-depth", cl::Hidden,
  180. cl::desc("Maximum depth of recursive SCEV operations implication analysis"),
  181. cl::init(2));
  182. static cl::opt<unsigned> MaxValueCompareDepth(
  183. "scalar-evolution-max-value-compare-depth", cl::Hidden,
  184. cl::desc("Maximum depth of recursive value complexity comparisons"),
  185. cl::init(2));
  186. static cl::opt<unsigned>
  187. MaxArithDepth("scalar-evolution-max-arith-depth", cl::Hidden,
  188. cl::desc("Maximum depth of recursive arithmetics"),
  189. cl::init(32));
  190. static cl::opt<unsigned> MaxConstantEvolvingDepth(
  191. "scalar-evolution-max-constant-evolving-depth", cl::Hidden,
  192. cl::desc("Maximum depth of recursive constant evolving"), cl::init(32));
  193. static cl::opt<unsigned>
  194. MaxCastDepth("scalar-evolution-max-cast-depth", cl::Hidden,
  195. cl::desc("Maximum depth of recursive SExt/ZExt/Trunc"),
  196. cl::init(8));
  197. static cl::opt<unsigned>
  198. MaxAddRecSize("scalar-evolution-max-add-rec-size", cl::Hidden,
  199. cl::desc("Max coefficients in AddRec during evolving"),
  200. cl::init(8));
  201. static cl::opt<unsigned>
  202. HugeExprThreshold("scalar-evolution-huge-expr-threshold", cl::Hidden,
  203. cl::desc("Size of the expression which is considered huge"),
  204. cl::init(4096));
  205. static cl::opt<bool>
  206. ClassifyExpressions("scalar-evolution-classify-expressions",
  207. cl::Hidden, cl::init(true),
  208. cl::desc("When printing analysis, include information on every instruction"));
  209. static cl::opt<bool> UseExpensiveRangeSharpening(
  210. "scalar-evolution-use-expensive-range-sharpening", cl::Hidden,
  211. cl::init(false),
  212. cl::desc("Use more powerful methods of sharpening expression ranges. May "
  213. "be costly in terms of compile time"));
  214. //===----------------------------------------------------------------------===//
  215. // SCEV class definitions
  216. //===----------------------------------------------------------------------===//
  217. //===----------------------------------------------------------------------===//
  218. // Implementation of the SCEV class.
  219. //
  220. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  221. LLVM_DUMP_METHOD void SCEV::dump() const {
  222. print(dbgs());
  223. dbgs() << '\n';
  224. }
  225. #endif
  226. void SCEV::print(raw_ostream &OS) const {
  227. switch (getSCEVType()) {
  228. case scConstant:
  229. cast<SCEVConstant>(this)->getValue()->printAsOperand(OS, false);
  230. return;
  231. case scPtrToInt: {
  232. const SCEVPtrToIntExpr *PtrToInt = cast<SCEVPtrToIntExpr>(this);
  233. const SCEV *Op = PtrToInt->getOperand();
  234. OS << "(ptrtoint " << *Op->getType() << " " << *Op << " to "
  235. << *PtrToInt->getType() << ")";
  236. return;
  237. }
  238. case scTruncate: {
  239. const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(this);
  240. const SCEV *Op = Trunc->getOperand();
  241. OS << "(trunc " << *Op->getType() << " " << *Op << " to "
  242. << *Trunc->getType() << ")";
  243. return;
  244. }
  245. case scZeroExtend: {
  246. const SCEVZeroExtendExpr *ZExt = cast<SCEVZeroExtendExpr>(this);
  247. const SCEV *Op = ZExt->getOperand();
  248. OS << "(zext " << *Op->getType() << " " << *Op << " to "
  249. << *ZExt->getType() << ")";
  250. return;
  251. }
  252. case scSignExtend: {
  253. const SCEVSignExtendExpr *SExt = cast<SCEVSignExtendExpr>(this);
  254. const SCEV *Op = SExt->getOperand();
  255. OS << "(sext " << *Op->getType() << " " << *Op << " to "
  256. << *SExt->getType() << ")";
  257. return;
  258. }
  259. case scAddRecExpr: {
  260. const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(this);
  261. OS << "{" << *AR->getOperand(0);
  262. for (unsigned i = 1, e = AR->getNumOperands(); i != e; ++i)
  263. OS << ",+," << *AR->getOperand(i);
  264. OS << "}<";
  265. if (AR->hasNoUnsignedWrap())
  266. OS << "nuw><";
  267. if (AR->hasNoSignedWrap())
  268. OS << "nsw><";
  269. if (AR->hasNoSelfWrap() &&
  270. !AR->getNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW)))
  271. OS << "nw><";
  272. AR->getLoop()->getHeader()->printAsOperand(OS, /*PrintType=*/false);
  273. OS << ">";
  274. return;
  275. }
  276. case scAddExpr:
  277. case scMulExpr:
  278. case scUMaxExpr:
  279. case scSMaxExpr:
  280. case scUMinExpr:
  281. case scSMinExpr:
  282. case scSequentialUMinExpr: {
  283. const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(this);
  284. const char *OpStr = nullptr;
  285. switch (NAry->getSCEVType()) {
  286. case scAddExpr: OpStr = " + "; break;
  287. case scMulExpr: OpStr = " * "; break;
  288. case scUMaxExpr: OpStr = " umax "; break;
  289. case scSMaxExpr: OpStr = " smax "; break;
  290. case scUMinExpr:
  291. OpStr = " umin ";
  292. break;
  293. case scSMinExpr:
  294. OpStr = " smin ";
  295. break;
  296. case scSequentialUMinExpr:
  297. OpStr = " umin_seq ";
  298. break;
  299. default:
  300. llvm_unreachable("There are no other nary expression types.");
  301. }
  302. OS << "(";
  303. ListSeparator LS(OpStr);
  304. for (const SCEV *Op : NAry->operands())
  305. OS << LS << *Op;
  306. OS << ")";
  307. switch (NAry->getSCEVType()) {
  308. case scAddExpr:
  309. case scMulExpr:
  310. if (NAry->hasNoUnsignedWrap())
  311. OS << "<nuw>";
  312. if (NAry->hasNoSignedWrap())
  313. OS << "<nsw>";
  314. break;
  315. default:
  316. // Nothing to print for other nary expressions.
  317. break;
  318. }
  319. return;
  320. }
  321. case scUDivExpr: {
  322. const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(this);
  323. OS << "(" << *UDiv->getLHS() << " /u " << *UDiv->getRHS() << ")";
  324. return;
  325. }
  326. case scUnknown: {
  327. const SCEVUnknown *U = cast<SCEVUnknown>(this);
  328. Type *AllocTy;
  329. if (U->isSizeOf(AllocTy)) {
  330. OS << "sizeof(" << *AllocTy << ")";
  331. return;
  332. }
  333. if (U->isAlignOf(AllocTy)) {
  334. OS << "alignof(" << *AllocTy << ")";
  335. return;
  336. }
  337. Type *CTy;
  338. Constant *FieldNo;
  339. if (U->isOffsetOf(CTy, FieldNo)) {
  340. OS << "offsetof(" << *CTy << ", ";
  341. FieldNo->printAsOperand(OS, false);
  342. OS << ")";
  343. return;
  344. }
  345. // Otherwise just print it normally.
  346. U->getValue()->printAsOperand(OS, false);
  347. return;
  348. }
  349. case scCouldNotCompute:
  350. OS << "***COULDNOTCOMPUTE***";
  351. return;
  352. }
  353. llvm_unreachable("Unknown SCEV kind!");
  354. }
  355. Type *SCEV::getType() const {
  356. switch (getSCEVType()) {
  357. case scConstant:
  358. return cast<SCEVConstant>(this)->getType();
  359. case scPtrToInt:
  360. case scTruncate:
  361. case scZeroExtend:
  362. case scSignExtend:
  363. return cast<SCEVCastExpr>(this)->getType();
  364. case scAddRecExpr:
  365. return cast<SCEVAddRecExpr>(this)->getType();
  366. case scMulExpr:
  367. return cast<SCEVMulExpr>(this)->getType();
  368. case scUMaxExpr:
  369. case scSMaxExpr:
  370. case scUMinExpr:
  371. case scSMinExpr:
  372. return cast<SCEVMinMaxExpr>(this)->getType();
  373. case scSequentialUMinExpr:
  374. return cast<SCEVSequentialMinMaxExpr>(this)->getType();
  375. case scAddExpr:
  376. return cast<SCEVAddExpr>(this)->getType();
  377. case scUDivExpr:
  378. return cast<SCEVUDivExpr>(this)->getType();
  379. case scUnknown:
  380. return cast<SCEVUnknown>(this)->getType();
  381. case scCouldNotCompute:
  382. llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
  383. }
  384. llvm_unreachable("Unknown SCEV kind!");
  385. }
  386. bool SCEV::isZero() const {
  387. if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
  388. return SC->getValue()->isZero();
  389. return false;
  390. }
  391. bool SCEV::isOne() const {
  392. if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
  393. return SC->getValue()->isOne();
  394. return false;
  395. }
  396. bool SCEV::isAllOnesValue() const {
  397. if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
  398. return SC->getValue()->isMinusOne();
  399. return false;
  400. }
  401. bool SCEV::isNonConstantNegative() const {
  402. const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(this);
  403. if (!Mul) return false;
  404. // If there is a constant factor, it will be first.
  405. const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0));
  406. if (!SC) return false;
  407. // Return true if the value is negative, this matches things like (-42 * V).
  408. return SC->getAPInt().isNegative();
  409. }
  410. SCEVCouldNotCompute::SCEVCouldNotCompute() :
  411. SCEV(FoldingSetNodeIDRef(), scCouldNotCompute, 0) {}
  412. bool SCEVCouldNotCompute::classof(const SCEV *S) {
  413. return S->getSCEVType() == scCouldNotCompute;
  414. }
  415. const SCEV *ScalarEvolution::getConstant(ConstantInt *V) {
  416. FoldingSetNodeID ID;
  417. ID.AddInteger(scConstant);
  418. ID.AddPointer(V);
  419. void *IP = nullptr;
  420. if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
  421. SCEV *S = new (SCEVAllocator) SCEVConstant(ID.Intern(SCEVAllocator), V);
  422. UniqueSCEVs.InsertNode(S, IP);
  423. return S;
  424. }
  425. const SCEV *ScalarEvolution::getConstant(const APInt &Val) {
  426. return getConstant(ConstantInt::get(getContext(), Val));
  427. }
  428. const SCEV *
  429. ScalarEvolution::getConstant(Type *Ty, uint64_t V, bool isSigned) {
  430. IntegerType *ITy = cast<IntegerType>(getEffectiveSCEVType(Ty));
  431. return getConstant(ConstantInt::get(ITy, V, isSigned));
  432. }
  433. SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID, SCEVTypes SCEVTy,
  434. const SCEV *op, Type *ty)
  435. : SCEV(ID, SCEVTy, computeExpressionSize(op)), Ty(ty) {
  436. Operands[0] = op;
  437. }
  438. SCEVPtrToIntExpr::SCEVPtrToIntExpr(const FoldingSetNodeIDRef ID, const SCEV *Op,
  439. Type *ITy)
  440. : SCEVCastExpr(ID, scPtrToInt, Op, ITy) {
  441. assert(getOperand()->getType()->isPointerTy() && Ty->isIntegerTy() &&
  442. "Must be a non-bit-width-changing pointer-to-integer cast!");
  443. }
  444. SCEVIntegralCastExpr::SCEVIntegralCastExpr(const FoldingSetNodeIDRef ID,
  445. SCEVTypes SCEVTy, const SCEV *op,
  446. Type *ty)
  447. : SCEVCastExpr(ID, SCEVTy, op, ty) {}
  448. SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID, const SCEV *op,
  449. Type *ty)
  450. : SCEVIntegralCastExpr(ID, scTruncate, op, ty) {
  451. assert(getOperand()->getType()->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
  452. "Cannot truncate non-integer value!");
  453. }
  454. SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID,
  455. const SCEV *op, Type *ty)
  456. : SCEVIntegralCastExpr(ID, scZeroExtend, op, ty) {
  457. assert(getOperand()->getType()->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
  458. "Cannot zero extend non-integer value!");
  459. }
  460. SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeIDRef ID,
  461. const SCEV *op, Type *ty)
  462. : SCEVIntegralCastExpr(ID, scSignExtend, op, ty) {
  463. assert(getOperand()->getType()->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
  464. "Cannot sign extend non-integer value!");
  465. }
  466. void SCEVUnknown::deleted() {
  467. // Clear this SCEVUnknown from various maps.
  468. SE->forgetMemoizedResults(this);
  469. // Remove this SCEVUnknown from the uniquing map.
  470. SE->UniqueSCEVs.RemoveNode(this);
  471. // Release the value.
  472. setValPtr(nullptr);
  473. }
  474. void SCEVUnknown::allUsesReplacedWith(Value *New) {
  475. // Remove this SCEVUnknown from the uniquing map.
  476. SE->UniqueSCEVs.RemoveNode(this);
  477. // Update this SCEVUnknown to point to the new value. This is needed
  478. // because there may still be outstanding SCEVs which still point to
  479. // this SCEVUnknown.
  480. setValPtr(New);
  481. }
  482. bool SCEVUnknown::isSizeOf(Type *&AllocTy) const {
  483. if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
  484. if (VCE->getOpcode() == Instruction::PtrToInt)
  485. if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
  486. if (CE->getOpcode() == Instruction::GetElementPtr &&
  487. CE->getOperand(0)->isNullValue() &&
  488. CE->getNumOperands() == 2)
  489. if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(1)))
  490. if (CI->isOne()) {
  491. AllocTy = cast<GEPOperator>(CE)->getSourceElementType();
  492. return true;
  493. }
  494. return false;
  495. }
  496. bool SCEVUnknown::isAlignOf(Type *&AllocTy) const {
  497. if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
  498. if (VCE->getOpcode() == Instruction::PtrToInt)
  499. if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
  500. if (CE->getOpcode() == Instruction::GetElementPtr &&
  501. CE->getOperand(0)->isNullValue()) {
  502. Type *Ty = cast<GEPOperator>(CE)->getSourceElementType();
  503. if (StructType *STy = dyn_cast<StructType>(Ty))
  504. if (!STy->isPacked() &&
  505. CE->getNumOperands() == 3 &&
  506. CE->getOperand(1)->isNullValue()) {
  507. if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(2)))
  508. if (CI->isOne() &&
  509. STy->getNumElements() == 2 &&
  510. STy->getElementType(0)->isIntegerTy(1)) {
  511. AllocTy = STy->getElementType(1);
  512. return true;
  513. }
  514. }
  515. }
  516. return false;
  517. }
  518. bool SCEVUnknown::isOffsetOf(Type *&CTy, Constant *&FieldNo) const {
  519. if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
  520. if (VCE->getOpcode() == Instruction::PtrToInt)
  521. if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
  522. if (CE->getOpcode() == Instruction::GetElementPtr &&
  523. CE->getNumOperands() == 3 &&
  524. CE->getOperand(0)->isNullValue() &&
  525. CE->getOperand(1)->isNullValue()) {
  526. Type *Ty = cast<GEPOperator>(CE)->getSourceElementType();
  527. // Ignore vector types here so that ScalarEvolutionExpander doesn't
  528. // emit getelementptrs that index into vectors.
  529. if (Ty->isStructTy() || Ty->isArrayTy()) {
  530. CTy = Ty;
  531. FieldNo = CE->getOperand(2);
  532. return true;
  533. }
  534. }
  535. return false;
  536. }
  537. //===----------------------------------------------------------------------===//
  538. // SCEV Utilities
  539. //===----------------------------------------------------------------------===//
  540. /// Compare the two values \p LV and \p RV in terms of their "complexity" where
  541. /// "complexity" is a partial (and somewhat ad-hoc) relation used to order
  542. /// operands in SCEV expressions. \p EqCache is a set of pairs of values that
  543. /// have been previously deemed to be "equally complex" by this routine. It is
  544. /// intended to avoid exponential time complexity in cases like:
  545. ///
  546. /// %a = f(%x, %y)
  547. /// %b = f(%a, %a)
  548. /// %c = f(%b, %b)
  549. ///
  550. /// %d = f(%x, %y)
  551. /// %e = f(%d, %d)
  552. /// %f = f(%e, %e)
  553. ///
  554. /// CompareValueComplexity(%f, %c)
  555. ///
  556. /// Since we do not continue running this routine on expression trees once we
  557. /// have seen unequal values, there is no need to track them in the cache.
  558. static int
  559. CompareValueComplexity(EquivalenceClasses<const Value *> &EqCacheValue,
  560. const LoopInfo *const LI, Value *LV, Value *RV,
  561. unsigned Depth) {
  562. if (Depth > MaxValueCompareDepth || EqCacheValue.isEquivalent(LV, RV))
  563. return 0;
  564. // Order pointer values after integer values. This helps SCEVExpander form
  565. // GEPs.
  566. bool LIsPointer = LV->getType()->isPointerTy(),
  567. RIsPointer = RV->getType()->isPointerTy();
  568. if (LIsPointer != RIsPointer)
  569. return (int)LIsPointer - (int)RIsPointer;
  570. // Compare getValueID values.
  571. unsigned LID = LV->getValueID(), RID = RV->getValueID();
  572. if (LID != RID)
  573. return (int)LID - (int)RID;
  574. // Sort arguments by their position.
  575. if (const auto *LA = dyn_cast<Argument>(LV)) {
  576. const auto *RA = cast<Argument>(RV);
  577. unsigned LArgNo = LA->getArgNo(), RArgNo = RA->getArgNo();
  578. return (int)LArgNo - (int)RArgNo;
  579. }
  580. if (const auto *LGV = dyn_cast<GlobalValue>(LV)) {
  581. const auto *RGV = cast<GlobalValue>(RV);
  582. const auto IsGVNameSemantic = [&](const GlobalValue *GV) {
  583. auto LT = GV->getLinkage();
  584. return !(GlobalValue::isPrivateLinkage(LT) ||
  585. GlobalValue::isInternalLinkage(LT));
  586. };
  587. // Use the names to distinguish the two values, but only if the
  588. // names are semantically important.
  589. if (IsGVNameSemantic(LGV) && IsGVNameSemantic(RGV))
  590. return LGV->getName().compare(RGV->getName());
  591. }
  592. // For instructions, compare their loop depth, and their operand count. This
  593. // is pretty loose.
  594. if (const auto *LInst = dyn_cast<Instruction>(LV)) {
  595. const auto *RInst = cast<Instruction>(RV);
  596. // Compare loop depths.
  597. const BasicBlock *LParent = LInst->getParent(),
  598. *RParent = RInst->getParent();
  599. if (LParent != RParent) {
  600. unsigned LDepth = LI->getLoopDepth(LParent),
  601. RDepth = LI->getLoopDepth(RParent);
  602. if (LDepth != RDepth)
  603. return (int)LDepth - (int)RDepth;
  604. }
  605. // Compare the number of operands.
  606. unsigned LNumOps = LInst->getNumOperands(),
  607. RNumOps = RInst->getNumOperands();
  608. if (LNumOps != RNumOps)
  609. return (int)LNumOps - (int)RNumOps;
  610. for (unsigned Idx : seq(0u, LNumOps)) {
  611. int Result =
  612. CompareValueComplexity(EqCacheValue, LI, LInst->getOperand(Idx),
  613. RInst->getOperand(Idx), Depth + 1);
  614. if (Result != 0)
  615. return Result;
  616. }
  617. }
  618. EqCacheValue.unionSets(LV, RV);
  619. return 0;
  620. }
  621. // Return negative, zero, or positive, if LHS is less than, equal to, or greater
  622. // than RHS, respectively. A three-way result allows recursive comparisons to be
  623. // more efficient.
  624. // If the max analysis depth was reached, return None, assuming we do not know
  625. // if they are equivalent for sure.
  626. static Optional<int>
  627. CompareSCEVComplexity(EquivalenceClasses<const SCEV *> &EqCacheSCEV,
  628. EquivalenceClasses<const Value *> &EqCacheValue,
  629. const LoopInfo *const LI, const SCEV *LHS,
  630. const SCEV *RHS, DominatorTree &DT, unsigned Depth = 0) {
  631. // Fast-path: SCEVs are uniqued so we can do a quick equality check.
  632. if (LHS == RHS)
  633. return 0;
  634. // Primarily, sort the SCEVs by their getSCEVType().
  635. SCEVTypes LType = LHS->getSCEVType(), RType = RHS->getSCEVType();
  636. if (LType != RType)
  637. return (int)LType - (int)RType;
  638. if (EqCacheSCEV.isEquivalent(LHS, RHS))
  639. return 0;
  640. if (Depth > MaxSCEVCompareDepth)
  641. return None;
  642. // Aside from the getSCEVType() ordering, the particular ordering
  643. // isn't very important except that it's beneficial to be consistent,
  644. // so that (a + b) and (b + a) don't end up as different expressions.
  645. switch (LType) {
  646. case scUnknown: {
  647. const SCEVUnknown *LU = cast<SCEVUnknown>(LHS);
  648. const SCEVUnknown *RU = cast<SCEVUnknown>(RHS);
  649. int X = CompareValueComplexity(EqCacheValue, LI, LU->getValue(),
  650. RU->getValue(), Depth + 1);
  651. if (X == 0)
  652. EqCacheSCEV.unionSets(LHS, RHS);
  653. return X;
  654. }
  655. case scConstant: {
  656. const SCEVConstant *LC = cast<SCEVConstant>(LHS);
  657. const SCEVConstant *RC = cast<SCEVConstant>(RHS);
  658. // Compare constant values.
  659. const APInt &LA = LC->getAPInt();
  660. const APInt &RA = RC->getAPInt();
  661. unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth();
  662. if (LBitWidth != RBitWidth)
  663. return (int)LBitWidth - (int)RBitWidth;
  664. return LA.ult(RA) ? -1 : 1;
  665. }
  666. case scAddRecExpr: {
  667. const SCEVAddRecExpr *LA = cast<SCEVAddRecExpr>(LHS);
  668. const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS);
  669. // There is always a dominance between two recs that are used by one SCEV,
  670. // so we can safely sort recs by loop header dominance. We require such
  671. // order in getAddExpr.
  672. const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop();
  673. if (LLoop != RLoop) {
  674. const BasicBlock *LHead = LLoop->getHeader(), *RHead = RLoop->getHeader();
  675. assert(LHead != RHead && "Two loops share the same header?");
  676. if (DT.dominates(LHead, RHead))
  677. return 1;
  678. else
  679. assert(DT.dominates(RHead, LHead) &&
  680. "No dominance between recurrences used by one SCEV?");
  681. return -1;
  682. }
  683. // Addrec complexity grows with operand count.
  684. unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands();
  685. if (LNumOps != RNumOps)
  686. return (int)LNumOps - (int)RNumOps;
  687. // Lexicographically compare.
  688. for (unsigned i = 0; i != LNumOps; ++i) {
  689. auto X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI,
  690. LA->getOperand(i), RA->getOperand(i), DT,
  691. Depth + 1);
  692. if (X != 0)
  693. return X;
  694. }
  695. EqCacheSCEV.unionSets(LHS, RHS);
  696. return 0;
  697. }
  698. case scAddExpr:
  699. case scMulExpr:
  700. case scSMaxExpr:
  701. case scUMaxExpr:
  702. case scSMinExpr:
  703. case scUMinExpr:
  704. case scSequentialUMinExpr: {
  705. const SCEVNAryExpr *LC = cast<SCEVNAryExpr>(LHS);
  706. const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS);
  707. // Lexicographically compare n-ary expressions.
  708. unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands();
  709. if (LNumOps != RNumOps)
  710. return (int)LNumOps - (int)RNumOps;
  711. for (unsigned i = 0; i != LNumOps; ++i) {
  712. auto X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI,
  713. LC->getOperand(i), RC->getOperand(i), DT,
  714. Depth + 1);
  715. if (X != 0)
  716. return X;
  717. }
  718. EqCacheSCEV.unionSets(LHS, RHS);
  719. return 0;
  720. }
  721. case scUDivExpr: {
  722. const SCEVUDivExpr *LC = cast<SCEVUDivExpr>(LHS);
  723. const SCEVUDivExpr *RC = cast<SCEVUDivExpr>(RHS);
  724. // Lexicographically compare udiv expressions.
  725. auto X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, LC->getLHS(),
  726. RC->getLHS(), DT, Depth + 1);
  727. if (X != 0)
  728. return X;
  729. X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, LC->getRHS(),
  730. RC->getRHS(), DT, Depth + 1);
  731. if (X == 0)
  732. EqCacheSCEV.unionSets(LHS, RHS);
  733. return X;
  734. }
  735. case scPtrToInt:
  736. case scTruncate:
  737. case scZeroExtend:
  738. case scSignExtend: {
  739. const SCEVCastExpr *LC = cast<SCEVCastExpr>(LHS);
  740. const SCEVCastExpr *RC = cast<SCEVCastExpr>(RHS);
  741. // Compare cast expressions by operand.
  742. auto X =
  743. CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, LC->getOperand(),
  744. RC->getOperand(), DT, Depth + 1);
  745. if (X == 0)
  746. EqCacheSCEV.unionSets(LHS, RHS);
  747. return X;
  748. }
  749. case scCouldNotCompute:
  750. llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
  751. }
  752. llvm_unreachable("Unknown SCEV kind!");
  753. }
  754. /// Given a list of SCEV objects, order them by their complexity, and group
  755. /// objects of the same complexity together by value. When this routine is
  756. /// finished, we know that any duplicates in the vector are consecutive and that
  757. /// complexity is monotonically increasing.
  758. ///
  759. /// Note that we go take special precautions to ensure that we get deterministic
  760. /// results from this routine. In other words, we don't want the results of
  761. /// this to depend on where the addresses of various SCEV objects happened to
  762. /// land in memory.
  763. static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops,
  764. LoopInfo *LI, DominatorTree &DT) {
  765. if (Ops.size() < 2) return; // Noop
  766. EquivalenceClasses<const SCEV *> EqCacheSCEV;
  767. EquivalenceClasses<const Value *> EqCacheValue;
  768. // Whether LHS has provably less complexity than RHS.
  769. auto IsLessComplex = [&](const SCEV *LHS, const SCEV *RHS) {
  770. auto Complexity =
  771. CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, LHS, RHS, DT);
  772. return Complexity && *Complexity < 0;
  773. };
  774. if (Ops.size() == 2) {
  775. // This is the common case, which also happens to be trivially simple.
  776. // Special case it.
  777. const SCEV *&LHS = Ops[0], *&RHS = Ops[1];
  778. if (IsLessComplex(RHS, LHS))
  779. std::swap(LHS, RHS);
  780. return;
  781. }
  782. // Do the rough sort by complexity.
  783. llvm::stable_sort(Ops, [&](const SCEV *LHS, const SCEV *RHS) {
  784. return IsLessComplex(LHS, RHS);
  785. });
  786. // Now that we are sorted by complexity, group elements of the same
  787. // complexity. Note that this is, at worst, N^2, but the vector is likely to
  788. // be extremely short in practice. Note that we take this approach because we
  789. // do not want to depend on the addresses of the objects we are grouping.
  790. for (unsigned i = 0, e = Ops.size(); i != e-2; ++i) {
  791. const SCEV *S = Ops[i];
  792. unsigned Complexity = S->getSCEVType();
  793. // If there are any objects of the same complexity and same value as this
  794. // one, group them.
  795. for (unsigned j = i+1; j != e && Ops[j]->getSCEVType() == Complexity; ++j) {
  796. if (Ops[j] == S) { // Found a duplicate.
  797. // Move it to immediately after i'th element.
  798. std::swap(Ops[i+1], Ops[j]);
  799. ++i; // no need to rescan it.
  800. if (i == e-2) return; // Done!
  801. }
  802. }
  803. }
  804. }
  805. /// Returns true if \p Ops contains a huge SCEV (the subtree of S contains at
  806. /// least HugeExprThreshold nodes).
  807. static bool hasHugeExpression(ArrayRef<const SCEV *> Ops) {
  808. return any_of(Ops, [](const SCEV *S) {
  809. return S->getExpressionSize() >= HugeExprThreshold;
  810. });
  811. }
  812. //===----------------------------------------------------------------------===//
  813. // Simple SCEV method implementations
  814. //===----------------------------------------------------------------------===//
  815. /// Compute BC(It, K). The result has width W. Assume, K > 0.
  816. static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K,
  817. ScalarEvolution &SE,
  818. Type *ResultTy) {
  819. // Handle the simplest case efficiently.
  820. if (K == 1)
  821. return SE.getTruncateOrZeroExtend(It, ResultTy);
  822. // We are using the following formula for BC(It, K):
  823. //
  824. // BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / K!
  825. //
  826. // Suppose, W is the bitwidth of the return value. We must be prepared for
  827. // overflow. Hence, we must assure that the result of our computation is
  828. // equal to the accurate one modulo 2^W. Unfortunately, division isn't
  829. // safe in modular arithmetic.
  830. //
  831. // However, this code doesn't use exactly that formula; the formula it uses
  832. // is something like the following, where T is the number of factors of 2 in
  833. // K! (i.e. trailing zeros in the binary representation of K!), and ^ is
  834. // exponentiation:
  835. //
  836. // BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / 2^T / (K! / 2^T)
  837. //
  838. // This formula is trivially equivalent to the previous formula. However,
  839. // this formula can be implemented much more efficiently. The trick is that
  840. // K! / 2^T is odd, and exact division by an odd number *is* safe in modular
  841. // arithmetic. To do exact division in modular arithmetic, all we have
  842. // to do is multiply by the inverse. Therefore, this step can be done at
  843. // width W.
  844. //
  845. // The next issue is how to safely do the division by 2^T. The way this
  846. // is done is by doing the multiplication step at a width of at least W + T
  847. // bits. This way, the bottom W+T bits of the product are accurate. Then,
  848. // when we perform the division by 2^T (which is equivalent to a right shift
  849. // by T), the bottom W bits are accurate. Extra bits are okay; they'll get
  850. // truncated out after the division by 2^T.
  851. //
  852. // In comparison to just directly using the first formula, this technique
  853. // is much more efficient; using the first formula requires W * K bits,
  854. // but this formula less than W + K bits. Also, the first formula requires
  855. // a division step, whereas this formula only requires multiplies and shifts.
  856. //
  857. // It doesn't matter whether the subtraction step is done in the calculation
  858. // width or the input iteration count's width; if the subtraction overflows,
  859. // the result must be zero anyway. We prefer here to do it in the width of
  860. // the induction variable because it helps a lot for certain cases; CodeGen
  861. // isn't smart enough to ignore the overflow, which leads to much less
  862. // efficient code if the width of the subtraction is wider than the native
  863. // register width.
  864. //
  865. // (It's possible to not widen at all by pulling out factors of 2 before
  866. // the multiplication; for example, K=2 can be calculated as
  867. // It/2*(It+(It*INT_MIN/INT_MIN)+-1). However, it requires
  868. // extra arithmetic, so it's not an obvious win, and it gets
  869. // much more complicated for K > 3.)
  870. // Protection from insane SCEVs; this bound is conservative,
  871. // but it probably doesn't matter.
  872. if (K > 1000)
  873. return SE.getCouldNotCompute();
  874. unsigned W = SE.getTypeSizeInBits(ResultTy);
  875. // Calculate K! / 2^T and T; we divide out the factors of two before
  876. // multiplying for calculating K! / 2^T to avoid overflow.
  877. // Other overflow doesn't matter because we only care about the bottom
  878. // W bits of the result.
  879. APInt OddFactorial(W, 1);
  880. unsigned T = 1;
  881. for (unsigned i = 3; i <= K; ++i) {
  882. APInt Mult(W, i);
  883. unsigned TwoFactors = Mult.countTrailingZeros();
  884. T += TwoFactors;
  885. Mult.lshrInPlace(TwoFactors);
  886. OddFactorial *= Mult;
  887. }
  888. // We need at least W + T bits for the multiplication step
  889. unsigned CalculationBits = W + T;
  890. // Calculate 2^T, at width T+W.
  891. APInt DivFactor = APInt::getOneBitSet(CalculationBits, T);
  892. // Calculate the multiplicative inverse of K! / 2^T;
  893. // this multiplication factor will perform the exact division by
  894. // K! / 2^T.
  895. APInt Mod = APInt::getSignedMinValue(W+1);
  896. APInt MultiplyFactor = OddFactorial.zext(W+1);
  897. MultiplyFactor = MultiplyFactor.multiplicativeInverse(Mod);
  898. MultiplyFactor = MultiplyFactor.trunc(W);
  899. // Calculate the product, at width T+W
  900. IntegerType *CalculationTy = IntegerType::get(SE.getContext(),
  901. CalculationBits);
  902. const SCEV *Dividend = SE.getTruncateOrZeroExtend(It, CalculationTy);
  903. for (unsigned i = 1; i != K; ++i) {
  904. const SCEV *S = SE.getMinusSCEV(It, SE.getConstant(It->getType(), i));
  905. Dividend = SE.getMulExpr(Dividend,
  906. SE.getTruncateOrZeroExtend(S, CalculationTy));
  907. }
  908. // Divide by 2^T
  909. const SCEV *DivResult = SE.getUDivExpr(Dividend, SE.getConstant(DivFactor));
  910. // Truncate the result, and divide by K! / 2^T.
  911. return SE.getMulExpr(SE.getConstant(MultiplyFactor),
  912. SE.getTruncateOrZeroExtend(DivResult, ResultTy));
  913. }
  914. /// Return the value of this chain of recurrences at the specified iteration
  915. /// number. We can evaluate this recurrence by multiplying each element in the
  916. /// chain by the binomial coefficient corresponding to it. In other words, we
  917. /// can evaluate {A,+,B,+,C,+,D} as:
  918. ///
  919. /// A*BC(It, 0) + B*BC(It, 1) + C*BC(It, 2) + D*BC(It, 3)
  920. ///
  921. /// where BC(It, k) stands for binomial coefficient.
  922. const SCEV *SCEVAddRecExpr::evaluateAtIteration(const SCEV *It,
  923. ScalarEvolution &SE) const {
  924. return evaluateAtIteration(makeArrayRef(op_begin(), op_end()), It, SE);
  925. }
  926. const SCEV *
  927. SCEVAddRecExpr::evaluateAtIteration(ArrayRef<const SCEV *> Operands,
  928. const SCEV *It, ScalarEvolution &SE) {
  929. assert(Operands.size() > 0);
  930. const SCEV *Result = Operands[0];
  931. for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
  932. // The computation is correct in the face of overflow provided that the
  933. // multiplication is performed _after_ the evaluation of the binomial
  934. // coefficient.
  935. const SCEV *Coeff = BinomialCoefficient(It, i, SE, Result->getType());
  936. if (isa<SCEVCouldNotCompute>(Coeff))
  937. return Coeff;
  938. Result = SE.getAddExpr(Result, SE.getMulExpr(Operands[i], Coeff));
  939. }
  940. return Result;
  941. }
  942. //===----------------------------------------------------------------------===//
  943. // SCEV Expression folder implementations
  944. //===----------------------------------------------------------------------===//
  945. const SCEV *ScalarEvolution::getLosslessPtrToIntExpr(const SCEV *Op,
  946. unsigned Depth) {
  947. assert(Depth <= 1 &&
  948. "getLosslessPtrToIntExpr() should self-recurse at most once.");
  949. // We could be called with an integer-typed operands during SCEV rewrites.
  950. // Since the operand is an integer already, just perform zext/trunc/self cast.
  951. if (!Op->getType()->isPointerTy())
  952. return Op;
  953. // What would be an ID for such a SCEV cast expression?
  954. FoldingSetNodeID ID;
  955. ID.AddInteger(scPtrToInt);
  956. ID.AddPointer(Op);
  957. void *IP = nullptr;
  958. // Is there already an expression for such a cast?
  959. if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP))
  960. return S;
  961. // It isn't legal for optimizations to construct new ptrtoint expressions
  962. // for non-integral pointers.
  963. if (getDataLayout().isNonIntegralPointerType(Op->getType()))
  964. return getCouldNotCompute();
  965. Type *IntPtrTy = getDataLayout().getIntPtrType(Op->getType());
  966. // We can only trivially model ptrtoint if SCEV's effective (integer) type
  967. // is sufficiently wide to represent all possible pointer values.
  968. // We could theoretically teach SCEV to truncate wider pointers, but
  969. // that isn't implemented for now.
  970. if (getDataLayout().getTypeSizeInBits(getEffectiveSCEVType(Op->getType())) !=
  971. getDataLayout().getTypeSizeInBits(IntPtrTy))
  972. return getCouldNotCompute();
  973. // If not, is this expression something we can't reduce any further?
  974. if (auto *U = dyn_cast<SCEVUnknown>(Op)) {
  975. // Perform some basic constant folding. If the operand of the ptr2int cast
  976. // is a null pointer, don't create a ptr2int SCEV expression (that will be
  977. // left as-is), but produce a zero constant.
  978. // NOTE: We could handle a more general case, but lack motivational cases.
  979. if (isa<ConstantPointerNull>(U->getValue()))
  980. return getZero(IntPtrTy);
  981. // Create an explicit cast node.
  982. // We can reuse the existing insert position since if we get here,
  983. // we won't have made any changes which would invalidate it.
  984. SCEV *S = new (SCEVAllocator)
  985. SCEVPtrToIntExpr(ID.Intern(SCEVAllocator), Op, IntPtrTy);
  986. UniqueSCEVs.InsertNode(S, IP);
  987. registerUser(S, Op);
  988. return S;
  989. }
  990. assert(Depth == 0 && "getLosslessPtrToIntExpr() should not self-recurse for "
  991. "non-SCEVUnknown's.");
  992. // Otherwise, we've got some expression that is more complex than just a
  993. // single SCEVUnknown. But we don't want to have a SCEVPtrToIntExpr of an
  994. // arbitrary expression, we want to have SCEVPtrToIntExpr of an SCEVUnknown
  995. // only, and the expressions must otherwise be integer-typed.
  996. // So sink the cast down to the SCEVUnknown's.
  997. /// The SCEVPtrToIntSinkingRewriter takes a scalar evolution expression,
  998. /// which computes a pointer-typed value, and rewrites the whole expression
  999. /// tree so that *all* the computations are done on integers, and the only
  1000. /// pointer-typed operands in the expression are SCEVUnknown.
  1001. class SCEVPtrToIntSinkingRewriter
  1002. : public SCEVRewriteVisitor<SCEVPtrToIntSinkingRewriter> {
  1003. using Base = SCEVRewriteVisitor<SCEVPtrToIntSinkingRewriter>;
  1004. public:
  1005. SCEVPtrToIntSinkingRewriter(ScalarEvolution &SE) : SCEVRewriteVisitor(SE) {}
  1006. static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE) {
  1007. SCEVPtrToIntSinkingRewriter Rewriter(SE);
  1008. return Rewriter.visit(Scev);
  1009. }
  1010. const SCEV *visit(const SCEV *S) {
  1011. Type *STy = S->getType();
  1012. // If the expression is not pointer-typed, just keep it as-is.
  1013. if (!STy->isPointerTy())
  1014. return S;
  1015. // Else, recursively sink the cast down into it.
  1016. return Base::visit(S);
  1017. }
  1018. const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
  1019. SmallVector<const SCEV *, 2> Operands;
  1020. bool Changed = false;
  1021. for (auto *Op : Expr->operands()) {
  1022. Operands.push_back(visit(Op));
  1023. Changed |= Op != Operands.back();
  1024. }
  1025. return !Changed ? Expr : SE.getAddExpr(Operands, Expr->getNoWrapFlags());
  1026. }
  1027. const SCEV *visitMulExpr(const SCEVMulExpr *Expr) {
  1028. SmallVector<const SCEV *, 2> Operands;
  1029. bool Changed = false;
  1030. for (auto *Op : Expr->operands()) {
  1031. Operands.push_back(visit(Op));
  1032. Changed |= Op != Operands.back();
  1033. }
  1034. return !Changed ? Expr : SE.getMulExpr(Operands, Expr->getNoWrapFlags());
  1035. }
  1036. const SCEV *visitUnknown(const SCEVUnknown *Expr) {
  1037. assert(Expr->getType()->isPointerTy() &&
  1038. "Should only reach pointer-typed SCEVUnknown's.");
  1039. return SE.getLosslessPtrToIntExpr(Expr, /*Depth=*/1);
  1040. }
  1041. };
  1042. // And actually perform the cast sinking.
  1043. const SCEV *IntOp = SCEVPtrToIntSinkingRewriter::rewrite(Op, *this);
  1044. assert(IntOp->getType()->isIntegerTy() &&
  1045. "We must have succeeded in sinking the cast, "
  1046. "and ending up with an integer-typed expression!");
  1047. return IntOp;
  1048. }
  1049. const SCEV *ScalarEvolution::getPtrToIntExpr(const SCEV *Op, Type *Ty) {
  1050. assert(Ty->isIntegerTy() && "Target type must be an integer type!");
  1051. const SCEV *IntOp = getLosslessPtrToIntExpr(Op);
  1052. if (isa<SCEVCouldNotCompute>(IntOp))
  1053. return IntOp;
  1054. return getTruncateOrZeroExtend(IntOp, Ty);
  1055. }
  1056. const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, Type *Ty,
  1057. unsigned Depth) {
  1058. assert(getTypeSizeInBits(Op->getType()) > getTypeSizeInBits(Ty) &&
  1059. "This is not a truncating conversion!");
  1060. assert(isSCEVable(Ty) &&
  1061. "This is not a conversion to a SCEVable type!");
  1062. assert(!Op->getType()->isPointerTy() && "Can't truncate pointer!");
  1063. Ty = getEffectiveSCEVType(Ty);
  1064. FoldingSetNodeID ID;
  1065. ID.AddInteger(scTruncate);
  1066. ID.AddPointer(Op);
  1067. ID.AddPointer(Ty);
  1068. void *IP = nullptr;
  1069. if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
  1070. // Fold if the operand is constant.
  1071. if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
  1072. return getConstant(
  1073. cast<ConstantInt>(ConstantExpr::getTrunc(SC->getValue(), Ty)));
  1074. // trunc(trunc(x)) --> trunc(x)
  1075. if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op))
  1076. return getTruncateExpr(ST->getOperand(), Ty, Depth + 1);
  1077. // trunc(sext(x)) --> sext(x) if widening or trunc(x) if narrowing
  1078. if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
  1079. return getTruncateOrSignExtend(SS->getOperand(), Ty, Depth + 1);
  1080. // trunc(zext(x)) --> zext(x) if widening or trunc(x) if narrowing
  1081. if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
  1082. return getTruncateOrZeroExtend(SZ->getOperand(), Ty, Depth + 1);
  1083. if (Depth > MaxCastDepth) {
  1084. SCEV *S =
  1085. new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator), Op, Ty);
  1086. UniqueSCEVs.InsertNode(S, IP);
  1087. registerUser(S, Op);
  1088. return S;
  1089. }
  1090. // trunc(x1 + ... + xN) --> trunc(x1) + ... + trunc(xN) and
  1091. // trunc(x1 * ... * xN) --> trunc(x1) * ... * trunc(xN),
  1092. // if after transforming we have at most one truncate, not counting truncates
  1093. // that replace other casts.
  1094. if (isa<SCEVAddExpr>(Op) || isa<SCEVMulExpr>(Op)) {
  1095. auto *CommOp = cast<SCEVCommutativeExpr>(Op);
  1096. SmallVector<const SCEV *, 4> Operands;
  1097. unsigned numTruncs = 0;
  1098. for (unsigned i = 0, e = CommOp->getNumOperands(); i != e && numTruncs < 2;
  1099. ++i) {
  1100. const SCEV *S = getTruncateExpr(CommOp->getOperand(i), Ty, Depth + 1);
  1101. if (!isa<SCEVIntegralCastExpr>(CommOp->getOperand(i)) &&
  1102. isa<SCEVTruncateExpr>(S))
  1103. numTruncs++;
  1104. Operands.push_back(S);
  1105. }
  1106. if (numTruncs < 2) {
  1107. if (isa<SCEVAddExpr>(Op))
  1108. return getAddExpr(Operands);
  1109. else if (isa<SCEVMulExpr>(Op))
  1110. return getMulExpr(Operands);
  1111. else
  1112. llvm_unreachable("Unexpected SCEV type for Op.");
  1113. }
  1114. // Although we checked in the beginning that ID is not in the cache, it is
  1115. // possible that during recursion and different modification ID was inserted
  1116. // into the cache. So if we find it, just return it.
  1117. if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP))
  1118. return S;
  1119. }
  1120. // If the input value is a chrec scev, truncate the chrec's operands.
  1121. if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) {
  1122. SmallVector<const SCEV *, 4> Operands;
  1123. for (const SCEV *Op : AddRec->operands())
  1124. Operands.push_back(getTruncateExpr(Op, Ty, Depth + 1));
  1125. return getAddRecExpr(Operands, AddRec->getLoop(), SCEV::FlagAnyWrap);
  1126. }
  1127. // Return zero if truncating to known zeros.
  1128. uint32_t MinTrailingZeros = GetMinTrailingZeros(Op);
  1129. if (MinTrailingZeros >= getTypeSizeInBits(Ty))
  1130. return getZero(Ty);
  1131. // The cast wasn't folded; create an explicit cast node. We can reuse
  1132. // the existing insert position since if we get here, we won't have
  1133. // made any changes which would invalidate it.
  1134. SCEV *S = new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator),
  1135. Op, Ty);
  1136. UniqueSCEVs.InsertNode(S, IP);
  1137. registerUser(S, Op);
  1138. return S;
  1139. }
  1140. // Get the limit of a recurrence such that incrementing by Step cannot cause
  1141. // signed overflow as long as the value of the recurrence within the
  1142. // loop does not exceed this limit before incrementing.
  1143. static const SCEV *getSignedOverflowLimitForStep(const SCEV *Step,
  1144. ICmpInst::Predicate *Pred,
  1145. ScalarEvolution *SE) {
  1146. unsigned BitWidth = SE->getTypeSizeInBits(Step->getType());
  1147. if (SE->isKnownPositive(Step)) {
  1148. *Pred = ICmpInst::ICMP_SLT;
  1149. return SE->getConstant(APInt::getSignedMinValue(BitWidth) -
  1150. SE->getSignedRangeMax(Step));
  1151. }
  1152. if (SE->isKnownNegative(Step)) {
  1153. *Pred = ICmpInst::ICMP_SGT;
  1154. return SE->getConstant(APInt::getSignedMaxValue(BitWidth) -
  1155. SE->getSignedRangeMin(Step));
  1156. }
  1157. return nullptr;
  1158. }
  1159. // Get the limit of a recurrence such that incrementing by Step cannot cause
  1160. // unsigned overflow as long as the value of the recurrence within the loop does
  1161. // not exceed this limit before incrementing.
  1162. static const SCEV *getUnsignedOverflowLimitForStep(const SCEV *Step,
  1163. ICmpInst::Predicate *Pred,
  1164. ScalarEvolution *SE) {
  1165. unsigned BitWidth = SE->getTypeSizeInBits(Step->getType());
  1166. *Pred = ICmpInst::ICMP_ULT;
  1167. return SE->getConstant(APInt::getMinValue(BitWidth) -
  1168. SE->getUnsignedRangeMax(Step));
  1169. }
  1170. namespace {
  1171. struct ExtendOpTraitsBase {
  1172. typedef const SCEV *(ScalarEvolution::*GetExtendExprTy)(const SCEV *, Type *,
  1173. unsigned);
  1174. };
  1175. // Used to make code generic over signed and unsigned overflow.
  1176. template <typename ExtendOp> struct ExtendOpTraits {
  1177. // Members present:
  1178. //
  1179. // static const SCEV::NoWrapFlags WrapType;
  1180. //
  1181. // static const ExtendOpTraitsBase::GetExtendExprTy GetExtendExpr;
  1182. //
  1183. // static const SCEV *getOverflowLimitForStep(const SCEV *Step,
  1184. // ICmpInst::Predicate *Pred,
  1185. // ScalarEvolution *SE);
  1186. };
  1187. template <>
  1188. struct ExtendOpTraits<SCEVSignExtendExpr> : public ExtendOpTraitsBase {
  1189. static const SCEV::NoWrapFlags WrapType = SCEV::FlagNSW;
  1190. static const GetExtendExprTy GetExtendExpr;
  1191. static const SCEV *getOverflowLimitForStep(const SCEV *Step,
  1192. ICmpInst::Predicate *Pred,
  1193. ScalarEvolution *SE) {
  1194. return getSignedOverflowLimitForStep(Step, Pred, SE);
  1195. }
  1196. };
  1197. const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits<
  1198. SCEVSignExtendExpr>::GetExtendExpr = &ScalarEvolution::getSignExtendExpr;
  1199. template <>
  1200. struct ExtendOpTraits<SCEVZeroExtendExpr> : public ExtendOpTraitsBase {
  1201. static const SCEV::NoWrapFlags WrapType = SCEV::FlagNUW;
  1202. static const GetExtendExprTy GetExtendExpr;
  1203. static const SCEV *getOverflowLimitForStep(const SCEV *Step,
  1204. ICmpInst::Predicate *Pred,
  1205. ScalarEvolution *SE) {
  1206. return getUnsignedOverflowLimitForStep(Step, Pred, SE);
  1207. }
  1208. };
  1209. const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits<
  1210. SCEVZeroExtendExpr>::GetExtendExpr = &ScalarEvolution::getZeroExtendExpr;
  1211. } // end anonymous namespace
  1212. // The recurrence AR has been shown to have no signed/unsigned wrap or something
  1213. // close to it. Typically, if we can prove NSW/NUW for AR, then we can just as
  1214. // easily prove NSW/NUW for its preincrement or postincrement sibling. This
  1215. // allows normalizing a sign/zero extended AddRec as such: {sext/zext(Step +
  1216. // Start),+,Step} => {(Step + sext/zext(Start),+,Step} As a result, the
  1217. // expression "Step + sext/zext(PreIncAR)" is congruent with
  1218. // "sext/zext(PostIncAR)"
  1219. template <typename ExtendOpTy>
  1220. static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty,
  1221. ScalarEvolution *SE, unsigned Depth) {
  1222. auto WrapType = ExtendOpTraits<ExtendOpTy>::WrapType;
  1223. auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr;
  1224. const Loop *L = AR->getLoop();
  1225. const SCEV *Start = AR->getStart();
  1226. const SCEV *Step = AR->getStepRecurrence(*SE);
  1227. // Check for a simple looking step prior to loop entry.
  1228. const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start);
  1229. if (!SA)
  1230. return nullptr;
  1231. // Create an AddExpr for "PreStart" after subtracting Step. Full SCEV
  1232. // subtraction is expensive. For this purpose, perform a quick and dirty
  1233. // difference, by checking for Step in the operand list.
  1234. SmallVector<const SCEV *, 4> DiffOps;
  1235. for (const SCEV *Op : SA->operands())
  1236. if (Op != Step)
  1237. DiffOps.push_back(Op);
  1238. if (DiffOps.size() == SA->getNumOperands())
  1239. return nullptr;
  1240. // Try to prove `WrapType` (SCEV::FlagNSW or SCEV::FlagNUW) on `PreStart` +
  1241. // `Step`:
  1242. // 1. NSW/NUW flags on the step increment.
  1243. auto PreStartFlags =
  1244. ScalarEvolution::maskFlags(SA->getNoWrapFlags(), SCEV::FlagNUW);
  1245. const SCEV *PreStart = SE->getAddExpr(DiffOps, PreStartFlags);
  1246. const SCEVAddRecExpr *PreAR = dyn_cast<SCEVAddRecExpr>(
  1247. SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap));
  1248. // "{S,+,X} is <nsw>/<nuw>" and "the backedge is taken at least once" implies
  1249. // "S+X does not sign/unsign-overflow".
  1250. //
  1251. const SCEV *BECount = SE->getBackedgeTakenCount(L);
  1252. if (PreAR && PreAR->getNoWrapFlags(WrapType) &&
  1253. !isa<SCEVCouldNotCompute>(BECount) && SE->isKnownPositive(BECount))
  1254. return PreStart;
  1255. // 2. Direct overflow check on the step operation's expression.
  1256. unsigned BitWidth = SE->getTypeSizeInBits(AR->getType());
  1257. Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2);
  1258. const SCEV *OperandExtendedStart =
  1259. SE->getAddExpr((SE->*GetExtendExpr)(PreStart, WideTy, Depth),
  1260. (SE->*GetExtendExpr)(Step, WideTy, Depth));
  1261. if ((SE->*GetExtendExpr)(Start, WideTy, Depth) == OperandExtendedStart) {
  1262. if (PreAR && AR->getNoWrapFlags(WrapType)) {
  1263. // If we know `AR` == {`PreStart`+`Step`,+,`Step`} is `WrapType` (FlagNSW
  1264. // or FlagNUW) and that `PreStart` + `Step` is `WrapType` too, then
  1265. // `PreAR` == {`PreStart`,+,`Step`} is also `WrapType`. Cache this fact.
  1266. SE->setNoWrapFlags(const_cast<SCEVAddRecExpr *>(PreAR), WrapType);
  1267. }
  1268. return PreStart;
  1269. }
  1270. // 3. Loop precondition.
  1271. ICmpInst::Predicate Pred;
  1272. const SCEV *OverflowLimit =
  1273. ExtendOpTraits<ExtendOpTy>::getOverflowLimitForStep(Step, &Pred, SE);
  1274. if (OverflowLimit &&
  1275. SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit))
  1276. return PreStart;
  1277. return nullptr;
  1278. }
  1279. // Get the normalized zero or sign extended expression for this AddRec's Start.
  1280. template <typename ExtendOpTy>
  1281. static const SCEV *getExtendAddRecStart(const SCEVAddRecExpr *AR, Type *Ty,
  1282. ScalarEvolution *SE,
  1283. unsigned Depth) {
  1284. auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr;
  1285. const SCEV *PreStart = getPreStartForExtend<ExtendOpTy>(AR, Ty, SE, Depth);
  1286. if (!PreStart)
  1287. return (SE->*GetExtendExpr)(AR->getStart(), Ty, Depth);
  1288. return SE->getAddExpr((SE->*GetExtendExpr)(AR->getStepRecurrence(*SE), Ty,
  1289. Depth),
  1290. (SE->*GetExtendExpr)(PreStart, Ty, Depth));
  1291. }
  1292. // Try to prove away overflow by looking at "nearby" add recurrences. A
  1293. // motivating example for this rule: if we know `{0,+,4}` is `ult` `-1` and it
  1294. // does not itself wrap then we can conclude that `{1,+,4}` is `nuw`.
  1295. //
  1296. // Formally:
  1297. //
  1298. // {S,+,X} == {S-T,+,X} + T
  1299. // => Ext({S,+,X}) == Ext({S-T,+,X} + T)
  1300. //
  1301. // If ({S-T,+,X} + T) does not overflow ... (1)
  1302. //
  1303. // RHS == Ext({S-T,+,X} + T) == Ext({S-T,+,X}) + Ext(T)
  1304. //
  1305. // If {S-T,+,X} does not overflow ... (2)
  1306. //
  1307. // RHS == Ext({S-T,+,X}) + Ext(T) == {Ext(S-T),+,Ext(X)} + Ext(T)
  1308. // == {Ext(S-T)+Ext(T),+,Ext(X)}
  1309. //
  1310. // If (S-T)+T does not overflow ... (3)
  1311. //
  1312. // RHS == {Ext(S-T)+Ext(T),+,Ext(X)} == {Ext(S-T+T),+,Ext(X)}
  1313. // == {Ext(S),+,Ext(X)} == LHS
  1314. //
  1315. // Thus, if (1), (2) and (3) are true for some T, then
  1316. // Ext({S,+,X}) == {Ext(S),+,Ext(X)}
  1317. //
  1318. // (3) is implied by (1) -- "(S-T)+T does not overflow" is simply "({S-T,+,X}+T)
  1319. // does not overflow" restricted to the 0th iteration. Therefore we only need
  1320. // to check for (1) and (2).
  1321. //
  1322. // In the current context, S is `Start`, X is `Step`, Ext is `ExtendOpTy` and T
  1323. // is `Delta` (defined below).
  1324. template <typename ExtendOpTy>
  1325. bool ScalarEvolution::proveNoWrapByVaryingStart(const SCEV *Start,
  1326. const SCEV *Step,
  1327. const Loop *L) {
  1328. auto WrapType = ExtendOpTraits<ExtendOpTy>::WrapType;
  1329. // We restrict `Start` to a constant to prevent SCEV from spending too much
  1330. // time here. It is correct (but more expensive) to continue with a
  1331. // non-constant `Start` and do a general SCEV subtraction to compute
  1332. // `PreStart` below.
  1333. const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start);
  1334. if (!StartC)
  1335. return false;
  1336. APInt StartAI = StartC->getAPInt();
  1337. for (unsigned Delta : {-2, -1, 1, 2}) {
  1338. const SCEV *PreStart = getConstant(StartAI - Delta);
  1339. FoldingSetNodeID ID;
  1340. ID.AddInteger(scAddRecExpr);
  1341. ID.AddPointer(PreStart);
  1342. ID.AddPointer(Step);
  1343. ID.AddPointer(L);
  1344. void *IP = nullptr;
  1345. const auto *PreAR =
  1346. static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
  1347. // Give up if we don't already have the add recurrence we need because
  1348. // actually constructing an add recurrence is relatively expensive.
  1349. if (PreAR && PreAR->getNoWrapFlags(WrapType)) { // proves (2)
  1350. const SCEV *DeltaS = getConstant(StartC->getType(), Delta);
  1351. ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
  1352. const SCEV *Limit = ExtendOpTraits<ExtendOpTy>::getOverflowLimitForStep(
  1353. DeltaS, &Pred, this);
  1354. if (Limit && isKnownPredicate(Pred, PreAR, Limit)) // proves (1)
  1355. return true;
  1356. }
  1357. }
  1358. return false;
  1359. }
  1360. // Finds an integer D for an expression (C + x + y + ...) such that the top
  1361. // level addition in (D + (C - D + x + y + ...)) would not wrap (signed or
  1362. // unsigned) and the number of trailing zeros of (C - D + x + y + ...) is
  1363. // maximized, where C is the \p ConstantTerm, x, y, ... are arbitrary SCEVs, and
  1364. // the (C + x + y + ...) expression is \p WholeAddExpr.
  1365. static APInt extractConstantWithoutWrapping(ScalarEvolution &SE,
  1366. const SCEVConstant *ConstantTerm,
  1367. const SCEVAddExpr *WholeAddExpr) {
  1368. const APInt &C = ConstantTerm->getAPInt();
  1369. const unsigned BitWidth = C.getBitWidth();
  1370. // Find number of trailing zeros of (x + y + ...) w/o the C first:
  1371. uint32_t TZ = BitWidth;
  1372. for (unsigned I = 1, E = WholeAddExpr->getNumOperands(); I < E && TZ; ++I)
  1373. TZ = std::min(TZ, SE.GetMinTrailingZeros(WholeAddExpr->getOperand(I)));
  1374. if (TZ) {
  1375. // Set D to be as many least significant bits of C as possible while still
  1376. // guaranteeing that adding D to (C - D + x + y + ...) won't cause a wrap:
  1377. return TZ < BitWidth ? C.trunc(TZ).zext(BitWidth) : C;
  1378. }
  1379. return APInt(BitWidth, 0);
  1380. }
  1381. // Finds an integer D for an affine AddRec expression {C,+,x} such that the top
  1382. // level addition in (D + {C-D,+,x}) would not wrap (signed or unsigned) and the
  1383. // number of trailing zeros of (C - D + x * n) is maximized, where C is the \p
  1384. // ConstantStart, x is an arbitrary \p Step, and n is the loop trip count.
  1385. static APInt extractConstantWithoutWrapping(ScalarEvolution &SE,
  1386. const APInt &ConstantStart,
  1387. const SCEV *Step) {
  1388. const unsigned BitWidth = ConstantStart.getBitWidth();
  1389. const uint32_t TZ = SE.GetMinTrailingZeros(Step);
  1390. if (TZ)
  1391. return TZ < BitWidth ? ConstantStart.trunc(TZ).zext(BitWidth)
  1392. : ConstantStart;
  1393. return APInt(BitWidth, 0);
  1394. }
  1395. const SCEV *
  1396. ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
  1397. assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
  1398. "This is not an extending conversion!");
  1399. assert(isSCEVable(Ty) &&
  1400. "This is not a conversion to a SCEVable type!");
  1401. assert(!Op->getType()->isPointerTy() && "Can't extend pointer!");
  1402. Ty = getEffectiveSCEVType(Ty);
  1403. // Fold if the operand is constant.
  1404. if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
  1405. return getConstant(
  1406. cast<ConstantInt>(ConstantExpr::getZExt(SC->getValue(), Ty)));
  1407. // zext(zext(x)) --> zext(x)
  1408. if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
  1409. return getZeroExtendExpr(SZ->getOperand(), Ty, Depth + 1);
  1410. // Before doing any expensive analysis, check to see if we've already
  1411. // computed a SCEV for this Op and Ty.
  1412. FoldingSetNodeID ID;
  1413. ID.AddInteger(scZeroExtend);
  1414. ID.AddPointer(Op);
  1415. ID.AddPointer(Ty);
  1416. void *IP = nullptr;
  1417. if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
  1418. if (Depth > MaxCastDepth) {
  1419. SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator),
  1420. Op, Ty);
  1421. UniqueSCEVs.InsertNode(S, IP);
  1422. registerUser(S, Op);
  1423. return S;
  1424. }
  1425. // zext(trunc(x)) --> zext(x) or x or trunc(x)
  1426. if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) {
  1427. // It's possible the bits taken off by the truncate were all zero bits. If
  1428. // so, we should be able to simplify this further.
  1429. const SCEV *X = ST->getOperand();
  1430. ConstantRange CR = getUnsignedRange(X);
  1431. unsigned TruncBits = getTypeSizeInBits(ST->getType());
  1432. unsigned NewBits = getTypeSizeInBits(Ty);
  1433. if (CR.truncate(TruncBits).zeroExtend(NewBits).contains(
  1434. CR.zextOrTrunc(NewBits)))
  1435. return getTruncateOrZeroExtend(X, Ty, Depth);
  1436. }
  1437. // If the input value is a chrec scev, and we can prove that the value
  1438. // did not overflow the old, smaller, value, we can zero extend all of the
  1439. // operands (often constants). This allows analysis of something like
  1440. // this: for (unsigned char X = 0; X < 100; ++X) { int Y = X; }
  1441. if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op))
  1442. if (AR->isAffine()) {
  1443. const SCEV *Start = AR->getStart();
  1444. const SCEV *Step = AR->getStepRecurrence(*this);
  1445. unsigned BitWidth = getTypeSizeInBits(AR->getType());
  1446. const Loop *L = AR->getLoop();
  1447. if (!AR->hasNoUnsignedWrap()) {
  1448. auto NewFlags = proveNoWrapViaConstantRanges(AR);
  1449. setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), NewFlags);
  1450. }
  1451. // If we have special knowledge that this addrec won't overflow,
  1452. // we don't need to do any further analysis.
  1453. if (AR->hasNoUnsignedWrap())
  1454. return getAddRecExpr(
  1455. getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Depth + 1),
  1456. getZeroExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags());
  1457. // Check whether the backedge-taken count is SCEVCouldNotCompute.
  1458. // Note that this serves two purposes: It filters out loops that are
  1459. // simply not analyzable, and it covers the case where this code is
  1460. // being called from within backedge-taken count analysis, such that
  1461. // attempting to ask for the backedge-taken count would likely result
  1462. // in infinite recursion. In the later case, the analysis code will
  1463. // cope with a conservative value, and it will take care to purge
  1464. // that value once it has finished.
  1465. const SCEV *MaxBECount = getConstantMaxBackedgeTakenCount(L);
  1466. if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
  1467. // Manually compute the final value for AR, checking for overflow.
  1468. // Check whether the backedge-taken count can be losslessly casted to
  1469. // the addrec's type. The count is always unsigned.
  1470. const SCEV *CastedMaxBECount =
  1471. getTruncateOrZeroExtend(MaxBECount, Start->getType(), Depth);
  1472. const SCEV *RecastedMaxBECount = getTruncateOrZeroExtend(
  1473. CastedMaxBECount, MaxBECount->getType(), Depth);
  1474. if (MaxBECount == RecastedMaxBECount) {
  1475. Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
  1476. // Check whether Start+Step*MaxBECount has no unsigned overflow.
  1477. const SCEV *ZMul = getMulExpr(CastedMaxBECount, Step,
  1478. SCEV::FlagAnyWrap, Depth + 1);
  1479. const SCEV *ZAdd = getZeroExtendExpr(getAddExpr(Start, ZMul,
  1480. SCEV::FlagAnyWrap,
  1481. Depth + 1),
  1482. WideTy, Depth + 1);
  1483. const SCEV *WideStart = getZeroExtendExpr(Start, WideTy, Depth + 1);
  1484. const SCEV *WideMaxBECount =
  1485. getZeroExtendExpr(CastedMaxBECount, WideTy, Depth + 1);
  1486. const SCEV *OperandExtendedAdd =
  1487. getAddExpr(WideStart,
  1488. getMulExpr(WideMaxBECount,
  1489. getZeroExtendExpr(Step, WideTy, Depth + 1),
  1490. SCEV::FlagAnyWrap, Depth + 1),
  1491. SCEV::FlagAnyWrap, Depth + 1);
  1492. if (ZAdd == OperandExtendedAdd) {
  1493. // Cache knowledge of AR NUW, which is propagated to this AddRec.
  1494. setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNUW);
  1495. // Return the expression with the addrec on the outside.
  1496. return getAddRecExpr(
  1497. getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this,
  1498. Depth + 1),
  1499. getZeroExtendExpr(Step, Ty, Depth + 1), L,
  1500. AR->getNoWrapFlags());
  1501. }
  1502. // Similar to above, only this time treat the step value as signed.
  1503. // This covers loops that count down.
  1504. OperandExtendedAdd =
  1505. getAddExpr(WideStart,
  1506. getMulExpr(WideMaxBECount,
  1507. getSignExtendExpr(Step, WideTy, Depth + 1),
  1508. SCEV::FlagAnyWrap, Depth + 1),
  1509. SCEV::FlagAnyWrap, Depth + 1);
  1510. if (ZAdd == OperandExtendedAdd) {
  1511. // Cache knowledge of AR NW, which is propagated to this AddRec.
  1512. // Negative step causes unsigned wrap, but it still can't self-wrap.
  1513. setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNW);
  1514. // Return the expression with the addrec on the outside.
  1515. return getAddRecExpr(
  1516. getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this,
  1517. Depth + 1),
  1518. getSignExtendExpr(Step, Ty, Depth + 1), L,
  1519. AR->getNoWrapFlags());
  1520. }
  1521. }
  1522. }
  1523. // Normally, in the cases we can prove no-overflow via a
  1524. // backedge guarding condition, we can also compute a backedge
  1525. // taken count for the loop. The exceptions are assumptions and
  1526. // guards present in the loop -- SCEV is not great at exploiting
  1527. // these to compute max backedge taken counts, but can still use
  1528. // these to prove lack of overflow. Use this fact to avoid
  1529. // doing extra work that may not pay off.
  1530. if (!isa<SCEVCouldNotCompute>(MaxBECount) || HasGuards ||
  1531. !AC.assumptions().empty()) {
  1532. auto NewFlags = proveNoUnsignedWrapViaInduction(AR);
  1533. setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), NewFlags);
  1534. if (AR->hasNoUnsignedWrap()) {
  1535. // Same as nuw case above - duplicated here to avoid a compile time
  1536. // issue. It's not clear that the order of checks does matter, but
  1537. // it's one of two issue possible causes for a change which was
  1538. // reverted. Be conservative for the moment.
  1539. return getAddRecExpr(
  1540. getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this,
  1541. Depth + 1),
  1542. getZeroExtendExpr(Step, Ty, Depth + 1), L,
  1543. AR->getNoWrapFlags());
  1544. }
  1545. // For a negative step, we can extend the operands iff doing so only
  1546. // traverses values in the range zext([0,UINT_MAX]).
  1547. if (isKnownNegative(Step)) {
  1548. const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) -
  1549. getSignedRangeMin(Step));
  1550. if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR, N) ||
  1551. isKnownOnEveryIteration(ICmpInst::ICMP_UGT, AR, N)) {
  1552. // Cache knowledge of AR NW, which is propagated to this
  1553. // AddRec. Negative step causes unsigned wrap, but it
  1554. // still can't self-wrap.
  1555. setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNW);
  1556. // Return the expression with the addrec on the outside.
  1557. return getAddRecExpr(
  1558. getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this,
  1559. Depth + 1),
  1560. getSignExtendExpr(Step, Ty, Depth + 1), L,
  1561. AR->getNoWrapFlags());
  1562. }
  1563. }
  1564. }
  1565. // zext({C,+,Step}) --> (zext(D) + zext({C-D,+,Step}))<nuw><nsw>
  1566. // if D + (C - D + Step * n) could be proven to not unsigned wrap
  1567. // where D maximizes the number of trailing zeros of (C - D + Step * n)
  1568. if (const auto *SC = dyn_cast<SCEVConstant>(Start)) {
  1569. const APInt &C = SC->getAPInt();
  1570. const APInt &D = extractConstantWithoutWrapping(*this, C, Step);
  1571. if (D != 0) {
  1572. const SCEV *SZExtD = getZeroExtendExpr(getConstant(D), Ty, Depth);
  1573. const SCEV *SResidual =
  1574. getAddRecExpr(getConstant(C - D), Step, L, AR->getNoWrapFlags());
  1575. const SCEV *SZExtR = getZeroExtendExpr(SResidual, Ty, Depth + 1);
  1576. return getAddExpr(SZExtD, SZExtR,
  1577. (SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNUW),
  1578. Depth + 1);
  1579. }
  1580. }
  1581. if (proveNoWrapByVaryingStart<SCEVZeroExtendExpr>(Start, Step, L)) {
  1582. setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNUW);
  1583. return getAddRecExpr(
  1584. getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Depth + 1),
  1585. getZeroExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags());
  1586. }
  1587. }
  1588. // zext(A % B) --> zext(A) % zext(B)
  1589. {
  1590. const SCEV *LHS;
  1591. const SCEV *RHS;
  1592. if (matchURem(Op, LHS, RHS))
  1593. return getURemExpr(getZeroExtendExpr(LHS, Ty, Depth + 1),
  1594. getZeroExtendExpr(RHS, Ty, Depth + 1));
  1595. }
  1596. // zext(A / B) --> zext(A) / zext(B).
  1597. if (auto *Div = dyn_cast<SCEVUDivExpr>(Op))
  1598. return getUDivExpr(getZeroExtendExpr(Div->getLHS(), Ty, Depth + 1),
  1599. getZeroExtendExpr(Div->getRHS(), Ty, Depth + 1));
  1600. if (auto *SA = dyn_cast<SCEVAddExpr>(Op)) {
  1601. // zext((A + B + ...)<nuw>) --> (zext(A) + zext(B) + ...)<nuw>
  1602. if (SA->hasNoUnsignedWrap()) {
  1603. // If the addition does not unsign overflow then we can, by definition,
  1604. // commute the zero extension with the addition operation.
  1605. SmallVector<const SCEV *, 4> Ops;
  1606. for (const auto *Op : SA->operands())
  1607. Ops.push_back(getZeroExtendExpr(Op, Ty, Depth + 1));
  1608. return getAddExpr(Ops, SCEV::FlagNUW, Depth + 1);
  1609. }
  1610. // zext(C + x + y + ...) --> (zext(D) + zext((C - D) + x + y + ...))
  1611. // if D + (C - D + x + y + ...) could be proven to not unsigned wrap
  1612. // where D maximizes the number of trailing zeros of (C - D + x + y + ...)
  1613. //
  1614. // Often address arithmetics contain expressions like
  1615. // (zext (add (shl X, C1), C2)), for instance, (zext (5 + (4 * X))).
  1616. // This transformation is useful while proving that such expressions are
  1617. // equal or differ by a small constant amount, see LoadStoreVectorizer pass.
  1618. if (const auto *SC = dyn_cast<SCEVConstant>(SA->getOperand(0))) {
  1619. const APInt &D = extractConstantWithoutWrapping(*this, SC, SA);
  1620. if (D != 0) {
  1621. const SCEV *SZExtD = getZeroExtendExpr(getConstant(D), Ty, Depth);
  1622. const SCEV *SResidual =
  1623. getAddExpr(getConstant(-D), SA, SCEV::FlagAnyWrap, Depth);
  1624. const SCEV *SZExtR = getZeroExtendExpr(SResidual, Ty, Depth + 1);
  1625. return getAddExpr(SZExtD, SZExtR,
  1626. (SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNUW),
  1627. Depth + 1);
  1628. }
  1629. }
  1630. }
  1631. if (auto *SM = dyn_cast<SCEVMulExpr>(Op)) {
  1632. // zext((A * B * ...)<nuw>) --> (zext(A) * zext(B) * ...)<nuw>
  1633. if (SM->hasNoUnsignedWrap()) {
  1634. // If the multiply does not unsign overflow then we can, by definition,
  1635. // commute the zero extension with the multiply operation.
  1636. SmallVector<const SCEV *, 4> Ops;
  1637. for (const auto *Op : SM->operands())
  1638. Ops.push_back(getZeroExtendExpr(Op, Ty, Depth + 1));
  1639. return getMulExpr(Ops, SCEV::FlagNUW, Depth + 1);
  1640. }
  1641. // zext(2^K * (trunc X to iN)) to iM ->
  1642. // 2^K * (zext(trunc X to i{N-K}) to iM)<nuw>
  1643. //
  1644. // Proof:
  1645. //
  1646. // zext(2^K * (trunc X to iN)) to iM
  1647. // = zext((trunc X to iN) << K) to iM
  1648. // = zext((trunc X to i{N-K}) << K)<nuw> to iM
  1649. // (because shl removes the top K bits)
  1650. // = zext((2^K * (trunc X to i{N-K}))<nuw>) to iM
  1651. // = (2^K * (zext(trunc X to i{N-K}) to iM))<nuw>.
  1652. //
  1653. if (SM->getNumOperands() == 2)
  1654. if (auto *MulLHS = dyn_cast<SCEVConstant>(SM->getOperand(0)))
  1655. if (MulLHS->getAPInt().isPowerOf2())
  1656. if (auto *TruncRHS = dyn_cast<SCEVTruncateExpr>(SM->getOperand(1))) {
  1657. int NewTruncBits = getTypeSizeInBits(TruncRHS->getType()) -
  1658. MulLHS->getAPInt().logBase2();
  1659. Type *NewTruncTy = IntegerType::get(getContext(), NewTruncBits);
  1660. return getMulExpr(
  1661. getZeroExtendExpr(MulLHS, Ty),
  1662. getZeroExtendExpr(
  1663. getTruncateExpr(TruncRHS->getOperand(), NewTruncTy), Ty),
  1664. SCEV::FlagNUW, Depth + 1);
  1665. }
  1666. }
  1667. // The cast wasn't folded; create an explicit cast node.
  1668. // Recompute the insert position, as it may have been invalidated.
  1669. if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
  1670. SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator),
  1671. Op, Ty);
  1672. UniqueSCEVs.InsertNode(S, IP);
  1673. registerUser(S, Op);
  1674. return S;
  1675. }
  1676. const SCEV *
  1677. ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
  1678. assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
  1679. "This is not an extending conversion!");
  1680. assert(isSCEVable(Ty) &&
  1681. "This is not a conversion to a SCEVable type!");
  1682. assert(!Op->getType()->isPointerTy() && "Can't extend pointer!");
  1683. Ty = getEffectiveSCEVType(Ty);
  1684. // Fold if the operand is constant.
  1685. if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
  1686. return getConstant(
  1687. cast<ConstantInt>(ConstantExpr::getSExt(SC->getValue(), Ty)));
  1688. // sext(sext(x)) --> sext(x)
  1689. if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
  1690. return getSignExtendExpr(SS->getOperand(), Ty, Depth + 1);
  1691. // sext(zext(x)) --> zext(x)
  1692. if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
  1693. return getZeroExtendExpr(SZ->getOperand(), Ty, Depth + 1);
  1694. // Before doing any expensive analysis, check to see if we've already
  1695. // computed a SCEV for this Op and Ty.
  1696. FoldingSetNodeID ID;
  1697. ID.AddInteger(scSignExtend);
  1698. ID.AddPointer(Op);
  1699. ID.AddPointer(Ty);
  1700. void *IP = nullptr;
  1701. if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
  1702. // Limit recursion depth.
  1703. if (Depth > MaxCastDepth) {
  1704. SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator),
  1705. Op, Ty);
  1706. UniqueSCEVs.InsertNode(S, IP);
  1707. registerUser(S, Op);
  1708. return S;
  1709. }
  1710. // sext(trunc(x)) --> sext(x) or x or trunc(x)
  1711. if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) {
  1712. // It's possible the bits taken off by the truncate were all sign bits. If
  1713. // so, we should be able to simplify this further.
  1714. const SCEV *X = ST->getOperand();
  1715. ConstantRange CR = getSignedRange(X);
  1716. unsigned TruncBits = getTypeSizeInBits(ST->getType());
  1717. unsigned NewBits = getTypeSizeInBits(Ty);
  1718. if (CR.truncate(TruncBits).signExtend(NewBits).contains(
  1719. CR.sextOrTrunc(NewBits)))
  1720. return getTruncateOrSignExtend(X, Ty, Depth);
  1721. }
  1722. if (auto *SA = dyn_cast<SCEVAddExpr>(Op)) {
  1723. // sext((A + B + ...)<nsw>) --> (sext(A) + sext(B) + ...)<nsw>
  1724. if (SA->hasNoSignedWrap()) {
  1725. // If the addition does not sign overflow then we can, by definition,
  1726. // commute the sign extension with the addition operation.
  1727. SmallVector<const SCEV *, 4> Ops;
  1728. for (const auto *Op : SA->operands())
  1729. Ops.push_back(getSignExtendExpr(Op, Ty, Depth + 1));
  1730. return getAddExpr(Ops, SCEV::FlagNSW, Depth + 1);
  1731. }
  1732. // sext(C + x + y + ...) --> (sext(D) + sext((C - D) + x + y + ...))
  1733. // if D + (C - D + x + y + ...) could be proven to not signed wrap
  1734. // where D maximizes the number of trailing zeros of (C - D + x + y + ...)
  1735. //
  1736. // For instance, this will bring two seemingly different expressions:
  1737. // 1 + sext(5 + 20 * %x + 24 * %y) and
  1738. // sext(6 + 20 * %x + 24 * %y)
  1739. // to the same form:
  1740. // 2 + sext(4 + 20 * %x + 24 * %y)
  1741. if (const auto *SC = dyn_cast<SCEVConstant>(SA->getOperand(0))) {
  1742. const APInt &D = extractConstantWithoutWrapping(*this, SC, SA);
  1743. if (D != 0) {
  1744. const SCEV *SSExtD = getSignExtendExpr(getConstant(D), Ty, Depth);
  1745. const SCEV *SResidual =
  1746. getAddExpr(getConstant(-D), SA, SCEV::FlagAnyWrap, Depth);
  1747. const SCEV *SSExtR = getSignExtendExpr(SResidual, Ty, Depth + 1);
  1748. return getAddExpr(SSExtD, SSExtR,
  1749. (SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNUW),
  1750. Depth + 1);
  1751. }
  1752. }
  1753. }
  1754. // If the input value is a chrec scev, and we can prove that the value
  1755. // did not overflow the old, smaller, value, we can sign extend all of the
  1756. // operands (often constants). This allows analysis of something like
  1757. // this: for (signed char X = 0; X < 100; ++X) { int Y = X; }
  1758. if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op))
  1759. if (AR->isAffine()) {
  1760. const SCEV *Start = AR->getStart();
  1761. const SCEV *Step = AR->getStepRecurrence(*this);
  1762. unsigned BitWidth = getTypeSizeInBits(AR->getType());
  1763. const Loop *L = AR->getLoop();
  1764. if (!AR->hasNoSignedWrap()) {
  1765. auto NewFlags = proveNoWrapViaConstantRanges(AR);
  1766. setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), NewFlags);
  1767. }
  1768. // If we have special knowledge that this addrec won't overflow,
  1769. // we don't need to do any further analysis.
  1770. if (AR->hasNoSignedWrap())
  1771. return getAddRecExpr(
  1772. getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Depth + 1),
  1773. getSignExtendExpr(Step, Ty, Depth + 1), L, SCEV::FlagNSW);
  1774. // Check whether the backedge-taken count is SCEVCouldNotCompute.
  1775. // Note that this serves two purposes: It filters out loops that are
  1776. // simply not analyzable, and it covers the case where this code is
  1777. // being called from within backedge-taken count analysis, such that
  1778. // attempting to ask for the backedge-taken count would likely result
  1779. // in infinite recursion. In the later case, the analysis code will
  1780. // cope with a conservative value, and it will take care to purge
  1781. // that value once it has finished.
  1782. const SCEV *MaxBECount = getConstantMaxBackedgeTakenCount(L);
  1783. if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
  1784. // Manually compute the final value for AR, checking for
  1785. // overflow.
  1786. // Check whether the backedge-taken count can be losslessly casted to
  1787. // the addrec's type. The count is always unsigned.
  1788. const SCEV *CastedMaxBECount =
  1789. getTruncateOrZeroExtend(MaxBECount, Start->getType(), Depth);
  1790. const SCEV *RecastedMaxBECount = getTruncateOrZeroExtend(
  1791. CastedMaxBECount, MaxBECount->getType(), Depth);
  1792. if (MaxBECount == RecastedMaxBECount) {
  1793. Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
  1794. // Check whether Start+Step*MaxBECount has no signed overflow.
  1795. const SCEV *SMul = getMulExpr(CastedMaxBECount, Step,
  1796. SCEV::FlagAnyWrap, Depth + 1);
  1797. const SCEV *SAdd = getSignExtendExpr(getAddExpr(Start, SMul,
  1798. SCEV::FlagAnyWrap,
  1799. Depth + 1),
  1800. WideTy, Depth + 1);
  1801. const SCEV *WideStart = getSignExtendExpr(Start, WideTy, Depth + 1);
  1802. const SCEV *WideMaxBECount =
  1803. getZeroExtendExpr(CastedMaxBECount, WideTy, Depth + 1);
  1804. const SCEV *OperandExtendedAdd =
  1805. getAddExpr(WideStart,
  1806. getMulExpr(WideMaxBECount,
  1807. getSignExtendExpr(Step, WideTy, Depth + 1),
  1808. SCEV::FlagAnyWrap, Depth + 1),
  1809. SCEV::FlagAnyWrap, Depth + 1);
  1810. if (SAdd == OperandExtendedAdd) {
  1811. // Cache knowledge of AR NSW, which is propagated to this AddRec.
  1812. setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNSW);
  1813. // Return the expression with the addrec on the outside.
  1814. return getAddRecExpr(
  1815. getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this,
  1816. Depth + 1),
  1817. getSignExtendExpr(Step, Ty, Depth + 1), L,
  1818. AR->getNoWrapFlags());
  1819. }
  1820. // Similar to above, only this time treat the step value as unsigned.
  1821. // This covers loops that count up with an unsigned step.
  1822. OperandExtendedAdd =
  1823. getAddExpr(WideStart,
  1824. getMulExpr(WideMaxBECount,
  1825. getZeroExtendExpr(Step, WideTy, Depth + 1),
  1826. SCEV::FlagAnyWrap, Depth + 1),
  1827. SCEV::FlagAnyWrap, Depth + 1);
  1828. if (SAdd == OperandExtendedAdd) {
  1829. // If AR wraps around then
  1830. //
  1831. // abs(Step) * MaxBECount > unsigned-max(AR->getType())
  1832. // => SAdd != OperandExtendedAdd
  1833. //
  1834. // Thus (AR is not NW => SAdd != OperandExtendedAdd) <=>
  1835. // (SAdd == OperandExtendedAdd => AR is NW)
  1836. setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNW);
  1837. // Return the expression with the addrec on the outside.
  1838. return getAddRecExpr(
  1839. getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this,
  1840. Depth + 1),
  1841. getZeroExtendExpr(Step, Ty, Depth + 1), L,
  1842. AR->getNoWrapFlags());
  1843. }
  1844. }
  1845. }
  1846. auto NewFlags = proveNoSignedWrapViaInduction(AR);
  1847. setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), NewFlags);
  1848. if (AR->hasNoSignedWrap()) {
  1849. // Same as nsw case above - duplicated here to avoid a compile time
  1850. // issue. It's not clear that the order of checks does matter, but
  1851. // it's one of two issue possible causes for a change which was
  1852. // reverted. Be conservative for the moment.
  1853. return getAddRecExpr(
  1854. getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Depth + 1),
  1855. getSignExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags());
  1856. }
  1857. // sext({C,+,Step}) --> (sext(D) + sext({C-D,+,Step}))<nuw><nsw>
  1858. // if D + (C - D + Step * n) could be proven to not signed wrap
  1859. // where D maximizes the number of trailing zeros of (C - D + Step * n)
  1860. if (const auto *SC = dyn_cast<SCEVConstant>(Start)) {
  1861. const APInt &C = SC->getAPInt();
  1862. const APInt &D = extractConstantWithoutWrapping(*this, C, Step);
  1863. if (D != 0) {
  1864. const SCEV *SSExtD = getSignExtendExpr(getConstant(D), Ty, Depth);
  1865. const SCEV *SResidual =
  1866. getAddRecExpr(getConstant(C - D), Step, L, AR->getNoWrapFlags());
  1867. const SCEV *SSExtR = getSignExtendExpr(SResidual, Ty, Depth + 1);
  1868. return getAddExpr(SSExtD, SSExtR,
  1869. (SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNUW),
  1870. Depth + 1);
  1871. }
  1872. }
  1873. if (proveNoWrapByVaryingStart<SCEVSignExtendExpr>(Start, Step, L)) {
  1874. setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNSW);
  1875. return getAddRecExpr(
  1876. getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Depth + 1),
  1877. getSignExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags());
  1878. }
  1879. }
  1880. // If the input value is provably positive and we could not simplify
  1881. // away the sext build a zext instead.
  1882. if (isKnownNonNegative(Op))
  1883. return getZeroExtendExpr(Op, Ty, Depth + 1);
  1884. // The cast wasn't folded; create an explicit cast node.
  1885. // Recompute the insert position, as it may have been invalidated.
  1886. if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
  1887. SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator),
  1888. Op, Ty);
  1889. UniqueSCEVs.InsertNode(S, IP);
  1890. registerUser(S, { Op });
  1891. return S;
  1892. }
  1893. const SCEV *ScalarEvolution::getCastExpr(SCEVTypes Kind, const SCEV *Op,
  1894. Type *Ty) {
  1895. switch (Kind) {
  1896. case scTruncate:
  1897. return getTruncateExpr(Op, Ty);
  1898. case scZeroExtend:
  1899. return getZeroExtendExpr(Op, Ty);
  1900. case scSignExtend:
  1901. return getSignExtendExpr(Op, Ty);
  1902. case scPtrToInt:
  1903. return getPtrToIntExpr(Op, Ty);
  1904. default:
  1905. llvm_unreachable("Not a SCEV cast expression!");
  1906. }
  1907. }
  1908. /// getAnyExtendExpr - Return a SCEV for the given operand extended with
  1909. /// unspecified bits out to the given type.
  1910. const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op,
  1911. Type *Ty) {
  1912. assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
  1913. "This is not an extending conversion!");
  1914. assert(isSCEVable(Ty) &&
  1915. "This is not a conversion to a SCEVable type!");
  1916. Ty = getEffectiveSCEVType(Ty);
  1917. // Sign-extend negative constants.
  1918. if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
  1919. if (SC->getAPInt().isNegative())
  1920. return getSignExtendExpr(Op, Ty);
  1921. // Peel off a truncate cast.
  1922. if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Op)) {
  1923. const SCEV *NewOp = T->getOperand();
  1924. if (getTypeSizeInBits(NewOp->getType()) < getTypeSizeInBits(Ty))
  1925. return getAnyExtendExpr(NewOp, Ty);
  1926. return getTruncateOrNoop(NewOp, Ty);
  1927. }
  1928. // Next try a zext cast. If the cast is folded, use it.
  1929. const SCEV *ZExt = getZeroExtendExpr(Op, Ty);
  1930. if (!isa<SCEVZeroExtendExpr>(ZExt))
  1931. return ZExt;
  1932. // Next try a sext cast. If the cast is folded, use it.
  1933. const SCEV *SExt = getSignExtendExpr(Op, Ty);
  1934. if (!isa<SCEVSignExtendExpr>(SExt))
  1935. return SExt;
  1936. // Force the cast to be folded into the operands of an addrec.
  1937. if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op)) {
  1938. SmallVector<const SCEV *, 4> Ops;
  1939. for (const SCEV *Op : AR->operands())
  1940. Ops.push_back(getAnyExtendExpr(Op, Ty));
  1941. return getAddRecExpr(Ops, AR->getLoop(), SCEV::FlagNW);
  1942. }
  1943. // If the expression is obviously signed, use the sext cast value.
  1944. if (isa<SCEVSMaxExpr>(Op))
  1945. return SExt;
  1946. // Absent any other information, use the zext cast value.
  1947. return ZExt;
  1948. }
  1949. /// Process the given Ops list, which is a list of operands to be added under
  1950. /// the given scale, update the given map. This is a helper function for
  1951. /// getAddRecExpr. As an example of what it does, given a sequence of operands
  1952. /// that would form an add expression like this:
  1953. ///
  1954. /// m + n + 13 + (A * (o + p + (B * (q + m + 29)))) + r + (-1 * r)
  1955. ///
  1956. /// where A and B are constants, update the map with these values:
  1957. ///
  1958. /// (m, 1+A*B), (n, 1), (o, A), (p, A), (q, A*B), (r, 0)
  1959. ///
  1960. /// and add 13 + A*B*29 to AccumulatedConstant.
  1961. /// This will allow getAddRecExpr to produce this:
  1962. ///
  1963. /// 13+A*B*29 + n + (m * (1+A*B)) + ((o + p) * A) + (q * A*B)
  1964. ///
  1965. /// This form often exposes folding opportunities that are hidden in
  1966. /// the original operand list.
  1967. ///
  1968. /// Return true iff it appears that any interesting folding opportunities
  1969. /// may be exposed. This helps getAddRecExpr short-circuit extra work in
  1970. /// the common case where no interesting opportunities are present, and
  1971. /// is also used as a check to avoid infinite recursion.
  1972. static bool
  1973. CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M,
  1974. SmallVectorImpl<const SCEV *> &NewOps,
  1975. APInt &AccumulatedConstant,
  1976. const SCEV *const *Ops, size_t NumOperands,
  1977. const APInt &Scale,
  1978. ScalarEvolution &SE) {
  1979. bool Interesting = false;
  1980. // Iterate over the add operands. They are sorted, with constants first.
  1981. unsigned i = 0;
  1982. while (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) {
  1983. ++i;
  1984. // Pull a buried constant out to the outside.
  1985. if (Scale != 1 || AccumulatedConstant != 0 || C->getValue()->isZero())
  1986. Interesting = true;
  1987. AccumulatedConstant += Scale * C->getAPInt();
  1988. }
  1989. // Next comes everything else. We're especially interested in multiplies
  1990. // here, but they're in the middle, so just visit the rest with one loop.
  1991. for (; i != NumOperands; ++i) {
  1992. const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[i]);
  1993. if (Mul && isa<SCEVConstant>(Mul->getOperand(0))) {
  1994. APInt NewScale =
  1995. Scale * cast<SCEVConstant>(Mul->getOperand(0))->getAPInt();
  1996. if (Mul->getNumOperands() == 2 && isa<SCEVAddExpr>(Mul->getOperand(1))) {
  1997. // A multiplication of a constant with another add; recurse.
  1998. const SCEVAddExpr *Add = cast<SCEVAddExpr>(Mul->getOperand(1));
  1999. Interesting |=
  2000. CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
  2001. Add->op_begin(), Add->getNumOperands(),
  2002. NewScale, SE);
  2003. } else {
  2004. // A multiplication of a constant with some other value. Update
  2005. // the map.
  2006. SmallVector<const SCEV *, 4> MulOps(drop_begin(Mul->operands()));
  2007. const SCEV *Key = SE.getMulExpr(MulOps);
  2008. auto Pair = M.insert({Key, NewScale});
  2009. if (Pair.second) {
  2010. NewOps.push_back(Pair.first->first);
  2011. } else {
  2012. Pair.first->second += NewScale;
  2013. // The map already had an entry for this value, which may indicate
  2014. // a folding opportunity.
  2015. Interesting = true;
  2016. }
  2017. }
  2018. } else {
  2019. // An ordinary operand. Update the map.
  2020. std::pair<DenseMap<const SCEV *, APInt>::iterator, bool> Pair =
  2021. M.insert({Ops[i], Scale});
  2022. if (Pair.second) {
  2023. NewOps.push_back(Pair.first->first);
  2024. } else {
  2025. Pair.first->second += Scale;
  2026. // The map already had an entry for this value, which may indicate
  2027. // a folding opportunity.
  2028. Interesting = true;
  2029. }
  2030. }
  2031. }
  2032. return Interesting;
  2033. }
  2034. bool ScalarEvolution::willNotOverflow(Instruction::BinaryOps BinOp, bool Signed,
  2035. const SCEV *LHS, const SCEV *RHS) {
  2036. const SCEV *(ScalarEvolution::*Operation)(const SCEV *, const SCEV *,
  2037. SCEV::NoWrapFlags, unsigned);
  2038. switch (BinOp) {
  2039. default:
  2040. llvm_unreachable("Unsupported binary op");
  2041. case Instruction::Add:
  2042. Operation = &ScalarEvolution::getAddExpr;
  2043. break;
  2044. case Instruction::Sub:
  2045. Operation = &ScalarEvolution::getMinusSCEV;
  2046. break;
  2047. case Instruction::Mul:
  2048. Operation = &ScalarEvolution::getMulExpr;
  2049. break;
  2050. }
  2051. const SCEV *(ScalarEvolution::*Extension)(const SCEV *, Type *, unsigned) =
  2052. Signed ? &ScalarEvolution::getSignExtendExpr
  2053. : &ScalarEvolution::getZeroExtendExpr;
  2054. // Check ext(LHS op RHS) == ext(LHS) op ext(RHS)
  2055. auto *NarrowTy = cast<IntegerType>(LHS->getType());
  2056. auto *WideTy =
  2057. IntegerType::get(NarrowTy->getContext(), NarrowTy->getBitWidth() * 2);
  2058. const SCEV *A = (this->*Extension)(
  2059. (this->*Operation)(LHS, RHS, SCEV::FlagAnyWrap, 0), WideTy, 0);
  2060. const SCEV *B = (this->*Operation)((this->*Extension)(LHS, WideTy, 0),
  2061. (this->*Extension)(RHS, WideTy, 0),
  2062. SCEV::FlagAnyWrap, 0);
  2063. return A == B;
  2064. }
  2065. std::pair<SCEV::NoWrapFlags, bool /*Deduced*/>
  2066. ScalarEvolution::getStrengthenedNoWrapFlagsFromBinOp(
  2067. const OverflowingBinaryOperator *OBO) {
  2068. SCEV::NoWrapFlags Flags = SCEV::NoWrapFlags::FlagAnyWrap;
  2069. if (OBO->hasNoUnsignedWrap())
  2070. Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW);
  2071. if (OBO->hasNoSignedWrap())
  2072. Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW);
  2073. bool Deduced = false;
  2074. if (OBO->hasNoUnsignedWrap() && OBO->hasNoSignedWrap())
  2075. return {Flags, Deduced};
  2076. if (OBO->getOpcode() != Instruction::Add &&
  2077. OBO->getOpcode() != Instruction::Sub &&
  2078. OBO->getOpcode() != Instruction::Mul)
  2079. return {Flags, Deduced};
  2080. const SCEV *LHS = getSCEV(OBO->getOperand(0));
  2081. const SCEV *RHS = getSCEV(OBO->getOperand(1));
  2082. if (!OBO->hasNoUnsignedWrap() &&
  2083. willNotOverflow((Instruction::BinaryOps)OBO->getOpcode(),
  2084. /* Signed */ false, LHS, RHS)) {
  2085. Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW);
  2086. Deduced = true;
  2087. }
  2088. if (!OBO->hasNoSignedWrap() &&
  2089. willNotOverflow((Instruction::BinaryOps)OBO->getOpcode(),
  2090. /* Signed */ true, LHS, RHS)) {
  2091. Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW);
  2092. Deduced = true;
  2093. }
  2094. return {Flags, Deduced};
  2095. }
  2096. // We're trying to construct a SCEV of type `Type' with `Ops' as operands and
  2097. // `OldFlags' as can't-wrap behavior. Infer a more aggressive set of
  2098. // can't-overflow flags for the operation if possible.
  2099. static SCEV::NoWrapFlags
  2100. StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type,
  2101. const ArrayRef<const SCEV *> Ops,
  2102. SCEV::NoWrapFlags Flags) {
  2103. using namespace std::placeholders;
  2104. using OBO = OverflowingBinaryOperator;
  2105. bool CanAnalyze =
  2106. Type == scAddExpr || Type == scAddRecExpr || Type == scMulExpr;
  2107. (void)CanAnalyze;
  2108. assert(CanAnalyze && "don't call from other places!");
  2109. int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW;
  2110. SCEV::NoWrapFlags SignOrUnsignWrap =
  2111. ScalarEvolution::maskFlags(Flags, SignOrUnsignMask);
  2112. // If FlagNSW is true and all the operands are non-negative, infer FlagNUW.
  2113. auto IsKnownNonNegative = [&](const SCEV *S) {
  2114. return SE->isKnownNonNegative(S);
  2115. };
  2116. if (SignOrUnsignWrap == SCEV::FlagNSW && all_of(Ops, IsKnownNonNegative))
  2117. Flags =
  2118. ScalarEvolution::setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask);
  2119. SignOrUnsignWrap = ScalarEvolution::maskFlags(Flags, SignOrUnsignMask);
  2120. if (SignOrUnsignWrap != SignOrUnsignMask &&
  2121. (Type == scAddExpr || Type == scMulExpr) && Ops.size() == 2 &&
  2122. isa<SCEVConstant>(Ops[0])) {
  2123. auto Opcode = [&] {
  2124. switch (Type) {
  2125. case scAddExpr:
  2126. return Instruction::Add;
  2127. case scMulExpr:
  2128. return Instruction::Mul;
  2129. default:
  2130. llvm_unreachable("Unexpected SCEV op.");
  2131. }
  2132. }();
  2133. const APInt &C = cast<SCEVConstant>(Ops[0])->getAPInt();
  2134. // (A <opcode> C) --> (A <opcode> C)<nsw> if the op doesn't sign overflow.
  2135. if (!(SignOrUnsignWrap & SCEV::FlagNSW)) {
  2136. auto NSWRegion = ConstantRange::makeGuaranteedNoWrapRegion(
  2137. Opcode, C, OBO::NoSignedWrap);
  2138. if (NSWRegion.contains(SE->getSignedRange(Ops[1])))
  2139. Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW);
  2140. }
  2141. // (A <opcode> C) --> (A <opcode> C)<nuw> if the op doesn't unsign overflow.
  2142. if (!(SignOrUnsignWrap & SCEV::FlagNUW)) {
  2143. auto NUWRegion = ConstantRange::makeGuaranteedNoWrapRegion(
  2144. Opcode, C, OBO::NoUnsignedWrap);
  2145. if (NUWRegion.contains(SE->getUnsignedRange(Ops[1])))
  2146. Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW);
  2147. }
  2148. }
  2149. // <0,+,nonnegative><nw> is also nuw
  2150. // TODO: Add corresponding nsw case
  2151. if (Type == scAddRecExpr && ScalarEvolution::hasFlags(Flags, SCEV::FlagNW) &&
  2152. !ScalarEvolution::hasFlags(Flags, SCEV::FlagNUW) && Ops.size() == 2 &&
  2153. Ops[0]->isZero() && IsKnownNonNegative(Ops[1]))
  2154. Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW);
  2155. // both (udiv X, Y) * Y and Y * (udiv X, Y) are always NUW
  2156. if (Type == scMulExpr && !ScalarEvolution::hasFlags(Flags, SCEV::FlagNUW) &&
  2157. Ops.size() == 2) {
  2158. if (auto *UDiv = dyn_cast<SCEVUDivExpr>(Ops[0]))
  2159. if (UDiv->getOperand(1) == Ops[1])
  2160. Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW);
  2161. if (auto *UDiv = dyn_cast<SCEVUDivExpr>(Ops[1]))
  2162. if (UDiv->getOperand(1) == Ops[0])
  2163. Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW);
  2164. }
  2165. return Flags;
  2166. }
  2167. bool ScalarEvolution::isAvailableAtLoopEntry(const SCEV *S, const Loop *L) {
  2168. return isLoopInvariant(S, L) && properlyDominates(S, L->getHeader());
  2169. }
  2170. /// Get a canonical add expression, or something simpler if possible.
  2171. const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
  2172. SCEV::NoWrapFlags OrigFlags,
  2173. unsigned Depth) {
  2174. assert(!(OrigFlags & ~(SCEV::FlagNUW | SCEV::FlagNSW)) &&
  2175. "only nuw or nsw allowed");
  2176. assert(!Ops.empty() && "Cannot get empty add!");
  2177. if (Ops.size() == 1) return Ops[0];
  2178. #ifndef NDEBUG
  2179. Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
  2180. for (unsigned i = 1, e = Ops.size(); i != e; ++i)
  2181. assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
  2182. "SCEVAddExpr operand types don't match!");
  2183. unsigned NumPtrs = count_if(
  2184. Ops, [](const SCEV *Op) { return Op->getType()->isPointerTy(); });
  2185. assert(NumPtrs <= 1 && "add has at most one pointer operand");
  2186. #endif
  2187. // Sort by complexity, this groups all similar expression types together.
  2188. GroupByComplexity(Ops, &LI, DT);
  2189. // If there are any constants, fold them together.
  2190. unsigned Idx = 0;
  2191. if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
  2192. ++Idx;
  2193. assert(Idx < Ops.size());
  2194. while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
  2195. // We found two constants, fold them together!
  2196. Ops[0] = getConstant(LHSC->getAPInt() + RHSC->getAPInt());
  2197. if (Ops.size() == 2) return Ops[0];
  2198. Ops.erase(Ops.begin()+1); // Erase the folded element
  2199. LHSC = cast<SCEVConstant>(Ops[0]);
  2200. }
  2201. // If we are left with a constant zero being added, strip it off.
  2202. if (LHSC->getValue()->isZero()) {
  2203. Ops.erase(Ops.begin());
  2204. --Idx;
  2205. }
  2206. if (Ops.size() == 1) return Ops[0];
  2207. }
  2208. // Delay expensive flag strengthening until necessary.
  2209. auto ComputeFlags = [this, OrigFlags](const ArrayRef<const SCEV *> Ops) {
  2210. return StrengthenNoWrapFlags(this, scAddExpr, Ops, OrigFlags);
  2211. };
  2212. // Limit recursion calls depth.
  2213. if (Depth > MaxArithDepth || hasHugeExpression(Ops))
  2214. return getOrCreateAddExpr(Ops, ComputeFlags(Ops));
  2215. if (SCEV *S = findExistingSCEVInCache(scAddExpr, Ops)) {
  2216. // Don't strengthen flags if we have no new information.
  2217. SCEVAddExpr *Add = static_cast<SCEVAddExpr *>(S);
  2218. if (Add->getNoWrapFlags(OrigFlags) != OrigFlags)
  2219. Add->setNoWrapFlags(ComputeFlags(Ops));
  2220. return S;
  2221. }
  2222. // Okay, check to see if the same value occurs in the operand list more than
  2223. // once. If so, merge them together into an multiply expression. Since we
  2224. // sorted the list, these values are required to be adjacent.
  2225. Type *Ty = Ops[0]->getType();
  2226. bool FoundMatch = false;
  2227. for (unsigned i = 0, e = Ops.size(); i != e-1; ++i)
  2228. if (Ops[i] == Ops[i+1]) { // X + Y + Y --> X + Y*2
  2229. // Scan ahead to count how many equal operands there are.
  2230. unsigned Count = 2;
  2231. while (i+Count != e && Ops[i+Count] == Ops[i])
  2232. ++Count;
  2233. // Merge the values into a multiply.
  2234. const SCEV *Scale = getConstant(Ty, Count);
  2235. const SCEV *Mul = getMulExpr(Scale, Ops[i], SCEV::FlagAnyWrap, Depth + 1);
  2236. if (Ops.size() == Count)
  2237. return Mul;
  2238. Ops[i] = Mul;
  2239. Ops.erase(Ops.begin()+i+1, Ops.begin()+i+Count);
  2240. --i; e -= Count - 1;
  2241. FoundMatch = true;
  2242. }
  2243. if (FoundMatch)
  2244. return getAddExpr(Ops, OrigFlags, Depth + 1);
  2245. // Check for truncates. If all the operands are truncated from the same
  2246. // type, see if factoring out the truncate would permit the result to be
  2247. // folded. eg., n*trunc(x) + m*trunc(y) --> trunc(trunc(m)*x + trunc(n)*y)
  2248. // if the contents of the resulting outer trunc fold to something simple.
  2249. auto FindTruncSrcType = [&]() -> Type * {
  2250. // We're ultimately looking to fold an addrec of truncs and muls of only
  2251. // constants and truncs, so if we find any other types of SCEV
  2252. // as operands of the addrec then we bail and return nullptr here.
  2253. // Otherwise, we return the type of the operand of a trunc that we find.
  2254. if (auto *T = dyn_cast<SCEVTruncateExpr>(Ops[Idx]))
  2255. return T->getOperand()->getType();
  2256. if (const auto *Mul = dyn_cast<SCEVMulExpr>(Ops[Idx])) {
  2257. const auto *LastOp = Mul->getOperand(Mul->getNumOperands() - 1);
  2258. if (const auto *T = dyn_cast<SCEVTruncateExpr>(LastOp))
  2259. return T->getOperand()->getType();
  2260. }
  2261. return nullptr;
  2262. };
  2263. if (auto *SrcType = FindTruncSrcType()) {
  2264. SmallVector<const SCEV *, 8> LargeOps;
  2265. bool Ok = true;
  2266. // Check all the operands to see if they can be represented in the
  2267. // source type of the truncate.
  2268. for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
  2269. if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Ops[i])) {
  2270. if (T->getOperand()->getType() != SrcType) {
  2271. Ok = false;
  2272. break;
  2273. }
  2274. LargeOps.push_back(T->getOperand());
  2275. } else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) {
  2276. LargeOps.push_back(getAnyExtendExpr(C, SrcType));
  2277. } else if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Ops[i])) {
  2278. SmallVector<const SCEV *, 8> LargeMulOps;
  2279. for (unsigned j = 0, f = M->getNumOperands(); j != f && Ok; ++j) {
  2280. if (const SCEVTruncateExpr *T =
  2281. dyn_cast<SCEVTruncateExpr>(M->getOperand(j))) {
  2282. if (T->getOperand()->getType() != SrcType) {
  2283. Ok = false;
  2284. break;
  2285. }
  2286. LargeMulOps.push_back(T->getOperand());
  2287. } else if (const auto *C = dyn_cast<SCEVConstant>(M->getOperand(j))) {
  2288. LargeMulOps.push_back(getAnyExtendExpr(C, SrcType));
  2289. } else {
  2290. Ok = false;
  2291. break;
  2292. }
  2293. }
  2294. if (Ok)
  2295. LargeOps.push_back(getMulExpr(LargeMulOps, SCEV::FlagAnyWrap, Depth + 1));
  2296. } else {
  2297. Ok = false;
  2298. break;
  2299. }
  2300. }
  2301. if (Ok) {
  2302. // Evaluate the expression in the larger type.
  2303. const SCEV *Fold = getAddExpr(LargeOps, SCEV::FlagAnyWrap, Depth + 1);
  2304. // If it folds to something simple, use it. Otherwise, don't.
  2305. if (isa<SCEVConstant>(Fold) || isa<SCEVUnknown>(Fold))
  2306. return getTruncateExpr(Fold, Ty);
  2307. }
  2308. }
  2309. if (Ops.size() == 2) {
  2310. // Check if we have an expression of the form ((X + C1) - C2), where C1 and
  2311. // C2 can be folded in a way that allows retaining wrapping flags of (X +
  2312. // C1).
  2313. const SCEV *A = Ops[0];
  2314. const SCEV *B = Ops[1];
  2315. auto *AddExpr = dyn_cast<SCEVAddExpr>(B);
  2316. auto *C = dyn_cast<SCEVConstant>(A);
  2317. if (AddExpr && C && isa<SCEVConstant>(AddExpr->getOperand(0))) {
  2318. auto C1 = cast<SCEVConstant>(AddExpr->getOperand(0))->getAPInt();
  2319. auto C2 = C->getAPInt();
  2320. SCEV::NoWrapFlags PreservedFlags = SCEV::FlagAnyWrap;
  2321. APInt ConstAdd = C1 + C2;
  2322. auto AddFlags = AddExpr->getNoWrapFlags();
  2323. // Adding a smaller constant is NUW if the original AddExpr was NUW.
  2324. if (ScalarEvolution::hasFlags(AddFlags, SCEV::FlagNUW) &&
  2325. ConstAdd.ule(C1)) {
  2326. PreservedFlags =
  2327. ScalarEvolution::setFlags(PreservedFlags, SCEV::FlagNUW);
  2328. }
  2329. // Adding a constant with the same sign and small magnitude is NSW, if the
  2330. // original AddExpr was NSW.
  2331. if (ScalarEvolution::hasFlags(AddFlags, SCEV::FlagNSW) &&
  2332. C1.isSignBitSet() == ConstAdd.isSignBitSet() &&
  2333. ConstAdd.abs().ule(C1.abs())) {
  2334. PreservedFlags =
  2335. ScalarEvolution::setFlags(PreservedFlags, SCEV::FlagNSW);
  2336. }
  2337. if (PreservedFlags != SCEV::FlagAnyWrap) {
  2338. SmallVector<const SCEV *, 4> NewOps(AddExpr->operands());
  2339. NewOps[0] = getConstant(ConstAdd);
  2340. return getAddExpr(NewOps, PreservedFlags);
  2341. }
  2342. }
  2343. }
  2344. // Canonicalize (-1 * urem X, Y) + X --> (Y * X/Y)
  2345. if (Ops.size() == 2) {
  2346. const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[0]);
  2347. if (Mul && Mul->getNumOperands() == 2 &&
  2348. Mul->getOperand(0)->isAllOnesValue()) {
  2349. const SCEV *X;
  2350. const SCEV *Y;
  2351. if (matchURem(Mul->getOperand(1), X, Y) && X == Ops[1]) {
  2352. return getMulExpr(Y, getUDivExpr(X, Y));
  2353. }
  2354. }
  2355. }
  2356. // Skip past any other cast SCEVs.
  2357. while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddExpr)
  2358. ++Idx;
  2359. // If there are add operands they would be next.
  2360. if (Idx < Ops.size()) {
  2361. bool DeletedAdd = false;
  2362. // If the original flags and all inlined SCEVAddExprs are NUW, use the
  2363. // common NUW flag for expression after inlining. Other flags cannot be
  2364. // preserved, because they may depend on the original order of operations.
  2365. SCEV::NoWrapFlags CommonFlags = maskFlags(OrigFlags, SCEV::FlagNUW);
  2366. while (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[Idx])) {
  2367. if (Ops.size() > AddOpsInlineThreshold ||
  2368. Add->getNumOperands() > AddOpsInlineThreshold)
  2369. break;
  2370. // If we have an add, expand the add operands onto the end of the operands
  2371. // list.
  2372. Ops.erase(Ops.begin()+Idx);
  2373. Ops.append(Add->op_begin(), Add->op_end());
  2374. DeletedAdd = true;
  2375. CommonFlags = maskFlags(CommonFlags, Add->getNoWrapFlags());
  2376. }
  2377. // If we deleted at least one add, we added operands to the end of the list,
  2378. // and they are not necessarily sorted. Recurse to resort and resimplify
  2379. // any operands we just acquired.
  2380. if (DeletedAdd)
  2381. return getAddExpr(Ops, CommonFlags, Depth + 1);
  2382. }
  2383. // Skip over the add expression until we get to a multiply.
  2384. while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr)
  2385. ++Idx;
  2386. // Check to see if there are any folding opportunities present with
  2387. // operands multiplied by constant values.
  2388. if (Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx])) {
  2389. uint64_t BitWidth = getTypeSizeInBits(Ty);
  2390. DenseMap<const SCEV *, APInt> M;
  2391. SmallVector<const SCEV *, 8> NewOps;
  2392. APInt AccumulatedConstant(BitWidth, 0);
  2393. if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
  2394. Ops.data(), Ops.size(),
  2395. APInt(BitWidth, 1), *this)) {
  2396. struct APIntCompare {
  2397. bool operator()(const APInt &LHS, const APInt &RHS) const {
  2398. return LHS.ult(RHS);
  2399. }
  2400. };
  2401. // Some interesting folding opportunity is present, so its worthwhile to
  2402. // re-generate the operands list. Group the operands by constant scale,
  2403. // to avoid multiplying by the same constant scale multiple times.
  2404. std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare> MulOpLists;
  2405. for (const SCEV *NewOp : NewOps)
  2406. MulOpLists[M.find(NewOp)->second].push_back(NewOp);
  2407. // Re-generate the operands list.
  2408. Ops.clear();
  2409. if (AccumulatedConstant != 0)
  2410. Ops.push_back(getConstant(AccumulatedConstant));
  2411. for (auto &MulOp : MulOpLists) {
  2412. if (MulOp.first == 1) {
  2413. Ops.push_back(getAddExpr(MulOp.second, SCEV::FlagAnyWrap, Depth + 1));
  2414. } else if (MulOp.first != 0) {
  2415. Ops.push_back(getMulExpr(
  2416. getConstant(MulOp.first),
  2417. getAddExpr(MulOp.second, SCEV::FlagAnyWrap, Depth + 1),
  2418. SCEV::FlagAnyWrap, Depth + 1));
  2419. }
  2420. }
  2421. if (Ops.empty())
  2422. return getZero(Ty);
  2423. if (Ops.size() == 1)
  2424. return Ops[0];
  2425. return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
  2426. }
  2427. }
  2428. // If we are adding something to a multiply expression, make sure the
  2429. // something is not already an operand of the multiply. If so, merge it into
  2430. // the multiply.
  2431. for (; Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx]); ++Idx) {
  2432. const SCEVMulExpr *Mul = cast<SCEVMulExpr>(Ops[Idx]);
  2433. for (unsigned MulOp = 0, e = Mul->getNumOperands(); MulOp != e; ++MulOp) {
  2434. const SCEV *MulOpSCEV = Mul->getOperand(MulOp);
  2435. if (isa<SCEVConstant>(MulOpSCEV))
  2436. continue;
  2437. for (unsigned AddOp = 0, e = Ops.size(); AddOp != e; ++AddOp)
  2438. if (MulOpSCEV == Ops[AddOp]) {
  2439. // Fold W + X + (X * Y * Z) --> W + (X * ((Y*Z)+1))
  2440. const SCEV *InnerMul = Mul->getOperand(MulOp == 0);
  2441. if (Mul->getNumOperands() != 2) {
  2442. // If the multiply has more than two operands, we must get the
  2443. // Y*Z term.
  2444. SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(),
  2445. Mul->op_begin()+MulOp);
  2446. MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end());
  2447. InnerMul = getMulExpr(MulOps, SCEV::FlagAnyWrap, Depth + 1);
  2448. }
  2449. SmallVector<const SCEV *, 2> TwoOps = {getOne(Ty), InnerMul};
  2450. const SCEV *AddOne = getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1);
  2451. const SCEV *OuterMul = getMulExpr(AddOne, MulOpSCEV,
  2452. SCEV::FlagAnyWrap, Depth + 1);
  2453. if (Ops.size() == 2) return OuterMul;
  2454. if (AddOp < Idx) {
  2455. Ops.erase(Ops.begin()+AddOp);
  2456. Ops.erase(Ops.begin()+Idx-1);
  2457. } else {
  2458. Ops.erase(Ops.begin()+Idx);
  2459. Ops.erase(Ops.begin()+AddOp-1);
  2460. }
  2461. Ops.push_back(OuterMul);
  2462. return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
  2463. }
  2464. // Check this multiply against other multiplies being added together.
  2465. for (unsigned OtherMulIdx = Idx+1;
  2466. OtherMulIdx < Ops.size() && isa<SCEVMulExpr>(Ops[OtherMulIdx]);
  2467. ++OtherMulIdx) {
  2468. const SCEVMulExpr *OtherMul = cast<SCEVMulExpr>(Ops[OtherMulIdx]);
  2469. // If MulOp occurs in OtherMul, we can fold the two multiplies
  2470. // together.
  2471. for (unsigned OMulOp = 0, e = OtherMul->getNumOperands();
  2472. OMulOp != e; ++OMulOp)
  2473. if (OtherMul->getOperand(OMulOp) == MulOpSCEV) {
  2474. // Fold X + (A*B*C) + (A*D*E) --> X + (A*(B*C+D*E))
  2475. const SCEV *InnerMul1 = Mul->getOperand(MulOp == 0);
  2476. if (Mul->getNumOperands() != 2) {
  2477. SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(),
  2478. Mul->op_begin()+MulOp);
  2479. MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end());
  2480. InnerMul1 = getMulExpr(MulOps, SCEV::FlagAnyWrap, Depth + 1);
  2481. }
  2482. const SCEV *InnerMul2 = OtherMul->getOperand(OMulOp == 0);
  2483. if (OtherMul->getNumOperands() != 2) {
  2484. SmallVector<const SCEV *, 4> MulOps(OtherMul->op_begin(),
  2485. OtherMul->op_begin()+OMulOp);
  2486. MulOps.append(OtherMul->op_begin()+OMulOp+1, OtherMul->op_end());
  2487. InnerMul2 = getMulExpr(MulOps, SCEV::FlagAnyWrap, Depth + 1);
  2488. }
  2489. SmallVector<const SCEV *, 2> TwoOps = {InnerMul1, InnerMul2};
  2490. const SCEV *InnerMulSum =
  2491. getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1);
  2492. const SCEV *OuterMul = getMulExpr(MulOpSCEV, InnerMulSum,
  2493. SCEV::FlagAnyWrap, Depth + 1);
  2494. if (Ops.size() == 2) return OuterMul;
  2495. Ops.erase(Ops.begin()+Idx);
  2496. Ops.erase(Ops.begin()+OtherMulIdx-1);
  2497. Ops.push_back(OuterMul);
  2498. return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
  2499. }
  2500. }
  2501. }
  2502. }
  2503. // If there are any add recurrences in the operands list, see if any other
  2504. // added values are loop invariant. If so, we can fold them into the
  2505. // recurrence.
  2506. while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr)
  2507. ++Idx;
  2508. // Scan over all recurrences, trying to fold loop invariants into them.
  2509. for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) {
  2510. // Scan all of the other operands to this add and add them to the vector if
  2511. // they are loop invariant w.r.t. the recurrence.
  2512. SmallVector<const SCEV *, 8> LIOps;
  2513. const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
  2514. const Loop *AddRecLoop = AddRec->getLoop();
  2515. for (unsigned i = 0, e = Ops.size(); i != e; ++i)
  2516. if (isAvailableAtLoopEntry(Ops[i], AddRecLoop)) {
  2517. LIOps.push_back(Ops[i]);
  2518. Ops.erase(Ops.begin()+i);
  2519. --i; --e;
  2520. }
  2521. // If we found some loop invariants, fold them into the recurrence.
  2522. if (!LIOps.empty()) {
  2523. // Compute nowrap flags for the addition of the loop-invariant ops and
  2524. // the addrec. Temporarily push it as an operand for that purpose. These
  2525. // flags are valid in the scope of the addrec only.
  2526. LIOps.push_back(AddRec);
  2527. SCEV::NoWrapFlags Flags = ComputeFlags(LIOps);
  2528. LIOps.pop_back();
  2529. // NLI + LI + {Start,+,Step} --> NLI + {LI+Start,+,Step}
  2530. LIOps.push_back(AddRec->getStart());
  2531. SmallVector<const SCEV *, 4> AddRecOps(AddRec->operands());
  2532. // It is not in general safe to propagate flags valid on an add within
  2533. // the addrec scope to one outside it. We must prove that the inner
  2534. // scope is guaranteed to execute if the outer one does to be able to
  2535. // safely propagate. We know the program is undefined if poison is
  2536. // produced on the inner scoped addrec. We also know that *for this use*
  2537. // the outer scoped add can't overflow (because of the flags we just
  2538. // computed for the inner scoped add) without the program being undefined.
  2539. // Proving that entry to the outer scope neccesitates entry to the inner
  2540. // scope, thus proves the program undefined if the flags would be violated
  2541. // in the outer scope.
  2542. SCEV::NoWrapFlags AddFlags = Flags;
  2543. if (AddFlags != SCEV::FlagAnyWrap) {
  2544. auto *DefI = getDefiningScopeBound(LIOps);
  2545. auto *ReachI = &*AddRecLoop->getHeader()->begin();
  2546. if (!isGuaranteedToTransferExecutionTo(DefI, ReachI))
  2547. AddFlags = SCEV::FlagAnyWrap;
  2548. }
  2549. AddRecOps[0] = getAddExpr(LIOps, AddFlags, Depth + 1);
  2550. // Build the new addrec. Propagate the NUW and NSW flags if both the
  2551. // outer add and the inner addrec are guaranteed to have no overflow.
  2552. // Always propagate NW.
  2553. Flags = AddRec->getNoWrapFlags(setFlags(Flags, SCEV::FlagNW));
  2554. const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRecLoop, Flags);
  2555. // If all of the other operands were loop invariant, we are done.
  2556. if (Ops.size() == 1) return NewRec;
  2557. // Otherwise, add the folded AddRec by the non-invariant parts.
  2558. for (unsigned i = 0;; ++i)
  2559. if (Ops[i] == AddRec) {
  2560. Ops[i] = NewRec;
  2561. break;
  2562. }
  2563. return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
  2564. }
  2565. // Okay, if there weren't any loop invariants to be folded, check to see if
  2566. // there are multiple AddRec's with the same loop induction variable being
  2567. // added together. If so, we can fold them.
  2568. for (unsigned OtherIdx = Idx+1;
  2569. OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
  2570. ++OtherIdx) {
  2571. // We expect the AddRecExpr's to be sorted in reverse dominance order,
  2572. // so that the 1st found AddRecExpr is dominated by all others.
  2573. assert(DT.dominates(
  2574. cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()->getHeader(),
  2575. AddRec->getLoop()->getHeader()) &&
  2576. "AddRecExprs are not sorted in reverse dominance order?");
  2577. if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) {
  2578. // Other + {A,+,B}<L> + {C,+,D}<L> --> Other + {A+C,+,B+D}<L>
  2579. SmallVector<const SCEV *, 4> AddRecOps(AddRec->operands());
  2580. for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
  2581. ++OtherIdx) {
  2582. const auto *OtherAddRec = cast<SCEVAddRecExpr>(Ops[OtherIdx]);
  2583. if (OtherAddRec->getLoop() == AddRecLoop) {
  2584. for (unsigned i = 0, e = OtherAddRec->getNumOperands();
  2585. i != e; ++i) {
  2586. if (i >= AddRecOps.size()) {
  2587. AddRecOps.append(OtherAddRec->op_begin()+i,
  2588. OtherAddRec->op_end());
  2589. break;
  2590. }
  2591. SmallVector<const SCEV *, 2> TwoOps = {
  2592. AddRecOps[i], OtherAddRec->getOperand(i)};
  2593. AddRecOps[i] = getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1);
  2594. }
  2595. Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
  2596. }
  2597. }
  2598. // Step size has changed, so we cannot guarantee no self-wraparound.
  2599. Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop, SCEV::FlagAnyWrap);
  2600. return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
  2601. }
  2602. }
  2603. // Otherwise couldn't fold anything into this recurrence. Move onto the
  2604. // next one.
  2605. }
  2606. // Okay, it looks like we really DO need an add expr. Check to see if we
  2607. // already have one, otherwise create a new one.
  2608. return getOrCreateAddExpr(Ops, ComputeFlags(Ops));
  2609. }
  2610. const SCEV *
  2611. ScalarEvolution::getOrCreateAddExpr(ArrayRef<const SCEV *> Ops,
  2612. SCEV::NoWrapFlags Flags) {
  2613. FoldingSetNodeID ID;
  2614. ID.AddInteger(scAddExpr);
  2615. for (const SCEV *Op : Ops)
  2616. ID.AddPointer(Op);
  2617. void *IP = nullptr;
  2618. SCEVAddExpr *S =
  2619. static_cast<SCEVAddExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
  2620. if (!S) {
  2621. const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
  2622. std::uninitialized_copy(Ops.begin(), Ops.end(), O);
  2623. S = new (SCEVAllocator)
  2624. SCEVAddExpr(ID.Intern(SCEVAllocator), O, Ops.size());
  2625. UniqueSCEVs.InsertNode(S, IP);
  2626. registerUser(S, Ops);
  2627. }
  2628. S->setNoWrapFlags(Flags);
  2629. return S;
  2630. }
  2631. const SCEV *
  2632. ScalarEvolution::getOrCreateAddRecExpr(ArrayRef<const SCEV *> Ops,
  2633. const Loop *L, SCEV::NoWrapFlags Flags) {
  2634. FoldingSetNodeID ID;
  2635. ID.AddInteger(scAddRecExpr);
  2636. for (const SCEV *Op : Ops)
  2637. ID.AddPointer(Op);
  2638. ID.AddPointer(L);
  2639. void *IP = nullptr;
  2640. SCEVAddRecExpr *S =
  2641. static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
  2642. if (!S) {
  2643. const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
  2644. std::uninitialized_copy(Ops.begin(), Ops.end(), O);
  2645. S = new (SCEVAllocator)
  2646. SCEVAddRecExpr(ID.Intern(SCEVAllocator), O, Ops.size(), L);
  2647. UniqueSCEVs.InsertNode(S, IP);
  2648. LoopUsers[L].push_back(S);
  2649. registerUser(S, Ops);
  2650. }
  2651. setNoWrapFlags(S, Flags);
  2652. return S;
  2653. }
  2654. const SCEV *
  2655. ScalarEvolution::getOrCreateMulExpr(ArrayRef<const SCEV *> Ops,
  2656. SCEV::NoWrapFlags Flags) {
  2657. FoldingSetNodeID ID;
  2658. ID.AddInteger(scMulExpr);
  2659. for (const SCEV *Op : Ops)
  2660. ID.AddPointer(Op);
  2661. void *IP = nullptr;
  2662. SCEVMulExpr *S =
  2663. static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
  2664. if (!S) {
  2665. const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
  2666. std::uninitialized_copy(Ops.begin(), Ops.end(), O);
  2667. S = new (SCEVAllocator) SCEVMulExpr(ID.Intern(SCEVAllocator),
  2668. O, Ops.size());
  2669. UniqueSCEVs.InsertNode(S, IP);
  2670. registerUser(S, Ops);
  2671. }
  2672. S->setNoWrapFlags(Flags);
  2673. return S;
  2674. }
  2675. static uint64_t umul_ov(uint64_t i, uint64_t j, bool &Overflow) {
  2676. uint64_t k = i*j;
  2677. if (j > 1 && k / j != i) Overflow = true;
  2678. return k;
  2679. }
  2680. /// Compute the result of "n choose k", the binomial coefficient. If an
  2681. /// intermediate computation overflows, Overflow will be set and the return will
  2682. /// be garbage. Overflow is not cleared on absence of overflow.
  2683. static uint64_t Choose(uint64_t n, uint64_t k, bool &Overflow) {
  2684. // We use the multiplicative formula:
  2685. // n(n-1)(n-2)...(n-(k-1)) / k(k-1)(k-2)...1 .
  2686. // At each iteration, we take the n-th term of the numeral and divide by the
  2687. // (k-n)th term of the denominator. This division will always produce an
  2688. // integral result, and helps reduce the chance of overflow in the
  2689. // intermediate computations. However, we can still overflow even when the
  2690. // final result would fit.
  2691. if (n == 0 || n == k) return 1;
  2692. if (k > n) return 0;
  2693. if (k > n/2)
  2694. k = n-k;
  2695. uint64_t r = 1;
  2696. for (uint64_t i = 1; i <= k; ++i) {
  2697. r = umul_ov(r, n-(i-1), Overflow);
  2698. r /= i;
  2699. }
  2700. return r;
  2701. }
  2702. /// Determine if any of the operands in this SCEV are a constant or if
  2703. /// any of the add or multiply expressions in this SCEV contain a constant.
  2704. static bool containsConstantInAddMulChain(const SCEV *StartExpr) {
  2705. struct FindConstantInAddMulChain {
  2706. bool FoundConstant = false;
  2707. bool follow(const SCEV *S) {
  2708. FoundConstant |= isa<SCEVConstant>(S);
  2709. return isa<SCEVAddExpr>(S) || isa<SCEVMulExpr>(S);
  2710. }
  2711. bool isDone() const {
  2712. return FoundConstant;
  2713. }
  2714. };
  2715. FindConstantInAddMulChain F;
  2716. SCEVTraversal<FindConstantInAddMulChain> ST(F);
  2717. ST.visitAll(StartExpr);
  2718. return F.FoundConstant;
  2719. }
  2720. /// Get a canonical multiply expression, or something simpler if possible.
  2721. const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
  2722. SCEV::NoWrapFlags OrigFlags,
  2723. unsigned Depth) {
  2724. assert(OrigFlags == maskFlags(OrigFlags, SCEV::FlagNUW | SCEV::FlagNSW) &&
  2725. "only nuw or nsw allowed");
  2726. assert(!Ops.empty() && "Cannot get empty mul!");
  2727. if (Ops.size() == 1) return Ops[0];
  2728. #ifndef NDEBUG
  2729. Type *ETy = Ops[0]->getType();
  2730. assert(!ETy->isPointerTy());
  2731. for (unsigned i = 1, e = Ops.size(); i != e; ++i)
  2732. assert(Ops[i]->getType() == ETy &&
  2733. "SCEVMulExpr operand types don't match!");
  2734. #endif
  2735. // Sort by complexity, this groups all similar expression types together.
  2736. GroupByComplexity(Ops, &LI, DT);
  2737. // If there are any constants, fold them together.
  2738. unsigned Idx = 0;
  2739. if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
  2740. ++Idx;
  2741. assert(Idx < Ops.size());
  2742. while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
  2743. // We found two constants, fold them together!
  2744. Ops[0] = getConstant(LHSC->getAPInt() * RHSC->getAPInt());
  2745. if (Ops.size() == 2) return Ops[0];
  2746. Ops.erase(Ops.begin()+1); // Erase the folded element
  2747. LHSC = cast<SCEVConstant>(Ops[0]);
  2748. }
  2749. // If we have a multiply of zero, it will always be zero.
  2750. if (LHSC->getValue()->isZero())
  2751. return LHSC;
  2752. // If we are left with a constant one being multiplied, strip it off.
  2753. if (LHSC->getValue()->isOne()) {
  2754. Ops.erase(Ops.begin());
  2755. --Idx;
  2756. }
  2757. if (Ops.size() == 1)
  2758. return Ops[0];
  2759. }
  2760. // Delay expensive flag strengthening until necessary.
  2761. auto ComputeFlags = [this, OrigFlags](const ArrayRef<const SCEV *> Ops) {
  2762. return StrengthenNoWrapFlags(this, scMulExpr, Ops, OrigFlags);
  2763. };
  2764. // Limit recursion calls depth.
  2765. if (Depth > MaxArithDepth || hasHugeExpression(Ops))
  2766. return getOrCreateMulExpr(Ops, ComputeFlags(Ops));
  2767. if (SCEV *S = findExistingSCEVInCache(scMulExpr, Ops)) {
  2768. // Don't strengthen flags if we have no new information.
  2769. SCEVMulExpr *Mul = static_cast<SCEVMulExpr *>(S);
  2770. if (Mul->getNoWrapFlags(OrigFlags) != OrigFlags)
  2771. Mul->setNoWrapFlags(ComputeFlags(Ops));
  2772. return S;
  2773. }
  2774. if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
  2775. if (Ops.size() == 2) {
  2776. // C1*(C2+V) -> C1*C2 + C1*V
  2777. if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1]))
  2778. // If any of Add's ops are Adds or Muls with a constant, apply this
  2779. // transformation as well.
  2780. //
  2781. // TODO: There are some cases where this transformation is not
  2782. // profitable; for example, Add = (C0 + X) * Y + Z. Maybe the scope of
  2783. // this transformation should be narrowed down.
  2784. if (Add->getNumOperands() == 2 && containsConstantInAddMulChain(Add))
  2785. return getAddExpr(getMulExpr(LHSC, Add->getOperand(0),
  2786. SCEV::FlagAnyWrap, Depth + 1),
  2787. getMulExpr(LHSC, Add->getOperand(1),
  2788. SCEV::FlagAnyWrap, Depth + 1),
  2789. SCEV::FlagAnyWrap, Depth + 1);
  2790. if (Ops[0]->isAllOnesValue()) {
  2791. // If we have a mul by -1 of an add, try distributing the -1 among the
  2792. // add operands.
  2793. if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1])) {
  2794. SmallVector<const SCEV *, 4> NewOps;
  2795. bool AnyFolded = false;
  2796. for (const SCEV *AddOp : Add->operands()) {
  2797. const SCEV *Mul = getMulExpr(Ops[0], AddOp, SCEV::FlagAnyWrap,
  2798. Depth + 1);
  2799. if (!isa<SCEVMulExpr>(Mul)) AnyFolded = true;
  2800. NewOps.push_back(Mul);
  2801. }
  2802. if (AnyFolded)
  2803. return getAddExpr(NewOps, SCEV::FlagAnyWrap, Depth + 1);
  2804. } else if (const auto *AddRec = dyn_cast<SCEVAddRecExpr>(Ops[1])) {
  2805. // Negation preserves a recurrence's no self-wrap property.
  2806. SmallVector<const SCEV *, 4> Operands;
  2807. for (const SCEV *AddRecOp : AddRec->operands())
  2808. Operands.push_back(getMulExpr(Ops[0], AddRecOp, SCEV::FlagAnyWrap,
  2809. Depth + 1));
  2810. return getAddRecExpr(Operands, AddRec->getLoop(),
  2811. AddRec->getNoWrapFlags(SCEV::FlagNW));
  2812. }
  2813. }
  2814. }
  2815. }
  2816. // Skip over the add expression until we get to a multiply.
  2817. while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr)
  2818. ++Idx;
  2819. // If there are mul operands inline them all into this expression.
  2820. if (Idx < Ops.size()) {
  2821. bool DeletedMul = false;
  2822. while (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[Idx])) {
  2823. if (Ops.size() > MulOpsInlineThreshold)
  2824. break;
  2825. // If we have an mul, expand the mul operands onto the end of the
  2826. // operands list.
  2827. Ops.erase(Ops.begin()+Idx);
  2828. Ops.append(Mul->op_begin(), Mul->op_end());
  2829. DeletedMul = true;
  2830. }
  2831. // If we deleted at least one mul, we added operands to the end of the
  2832. // list, and they are not necessarily sorted. Recurse to resort and
  2833. // resimplify any operands we just acquired.
  2834. if (DeletedMul)
  2835. return getMulExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
  2836. }
  2837. // If there are any add recurrences in the operands list, see if any other
  2838. // added values are loop invariant. If so, we can fold them into the
  2839. // recurrence.
  2840. while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr)
  2841. ++Idx;
  2842. // Scan over all recurrences, trying to fold loop invariants into them.
  2843. for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) {
  2844. // Scan all of the other operands to this mul and add them to the vector
  2845. // if they are loop invariant w.r.t. the recurrence.
  2846. SmallVector<const SCEV *, 8> LIOps;
  2847. const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
  2848. const Loop *AddRecLoop = AddRec->getLoop();
  2849. for (unsigned i = 0, e = Ops.size(); i != e; ++i)
  2850. if (isAvailableAtLoopEntry(Ops[i], AddRecLoop)) {
  2851. LIOps.push_back(Ops[i]);
  2852. Ops.erase(Ops.begin()+i);
  2853. --i; --e;
  2854. }
  2855. // If we found some loop invariants, fold them into the recurrence.
  2856. if (!LIOps.empty()) {
  2857. // NLI * LI * {Start,+,Step} --> NLI * {LI*Start,+,LI*Step}
  2858. SmallVector<const SCEV *, 4> NewOps;
  2859. NewOps.reserve(AddRec->getNumOperands());
  2860. const SCEV *Scale = getMulExpr(LIOps, SCEV::FlagAnyWrap, Depth + 1);
  2861. for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i)
  2862. NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i),
  2863. SCEV::FlagAnyWrap, Depth + 1));
  2864. // Build the new addrec. Propagate the NUW and NSW flags if both the
  2865. // outer mul and the inner addrec are guaranteed to have no overflow.
  2866. //
  2867. // No self-wrap cannot be guaranteed after changing the step size, but
  2868. // will be inferred if either NUW or NSW is true.
  2869. SCEV::NoWrapFlags Flags = ComputeFlags({Scale, AddRec});
  2870. const SCEV *NewRec = getAddRecExpr(
  2871. NewOps, AddRecLoop, AddRec->getNoWrapFlags(Flags));
  2872. // If all of the other operands were loop invariant, we are done.
  2873. if (Ops.size() == 1) return NewRec;
  2874. // Otherwise, multiply the folded AddRec by the non-invariant parts.
  2875. for (unsigned i = 0;; ++i)
  2876. if (Ops[i] == AddRec) {
  2877. Ops[i] = NewRec;
  2878. break;
  2879. }
  2880. return getMulExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
  2881. }
  2882. // Okay, if there weren't any loop invariants to be folded, check to see
  2883. // if there are multiple AddRec's with the same loop induction variable
  2884. // being multiplied together. If so, we can fold them.
  2885. // {A1,+,A2,+,...,+,An}<L> * {B1,+,B2,+,...,+,Bn}<L>
  2886. // = {x=1 in [ sum y=x..2x [ sum z=max(y-x, y-n)..min(x,n) [
  2887. // choose(x, 2x)*choose(2x-y, x-z)*A_{y-z}*B_z
  2888. // ]]],+,...up to x=2n}.
  2889. // Note that the arguments to choose() are always integers with values
  2890. // known at compile time, never SCEV objects.
  2891. //
  2892. // The implementation avoids pointless extra computations when the two
  2893. // addrec's are of different length (mathematically, it's equivalent to
  2894. // an infinite stream of zeros on the right).
  2895. bool OpsModified = false;
  2896. for (unsigned OtherIdx = Idx+1;
  2897. OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
  2898. ++OtherIdx) {
  2899. const SCEVAddRecExpr *OtherAddRec =
  2900. dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]);
  2901. if (!OtherAddRec || OtherAddRec->getLoop() != AddRecLoop)
  2902. continue;
  2903. // Limit max number of arguments to avoid creation of unreasonably big
  2904. // SCEVAddRecs with very complex operands.
  2905. if (AddRec->getNumOperands() + OtherAddRec->getNumOperands() - 1 >
  2906. MaxAddRecSize || hasHugeExpression({AddRec, OtherAddRec}))
  2907. continue;
  2908. bool Overflow = false;
  2909. Type *Ty = AddRec->getType();
  2910. bool LargerThan64Bits = getTypeSizeInBits(Ty) > 64;
  2911. SmallVector<const SCEV*, 7> AddRecOps;
  2912. for (int x = 0, xe = AddRec->getNumOperands() +
  2913. OtherAddRec->getNumOperands() - 1; x != xe && !Overflow; ++x) {
  2914. SmallVector <const SCEV *, 7> SumOps;
  2915. for (int y = x, ye = 2*x+1; y != ye && !Overflow; ++y) {
  2916. uint64_t Coeff1 = Choose(x, 2*x - y, Overflow);
  2917. for (int z = std::max(y-x, y-(int)AddRec->getNumOperands()+1),
  2918. ze = std::min(x+1, (int)OtherAddRec->getNumOperands());
  2919. z < ze && !Overflow; ++z) {
  2920. uint64_t Coeff2 = Choose(2*x - y, x-z, Overflow);
  2921. uint64_t Coeff;
  2922. if (LargerThan64Bits)
  2923. Coeff = umul_ov(Coeff1, Coeff2, Overflow);
  2924. else
  2925. Coeff = Coeff1*Coeff2;
  2926. const SCEV *CoeffTerm = getConstant(Ty, Coeff);
  2927. const SCEV *Term1 = AddRec->getOperand(y-z);
  2928. const SCEV *Term2 = OtherAddRec->getOperand(z);
  2929. SumOps.push_back(getMulExpr(CoeffTerm, Term1, Term2,
  2930. SCEV::FlagAnyWrap, Depth + 1));
  2931. }
  2932. }
  2933. if (SumOps.empty())
  2934. SumOps.push_back(getZero(Ty));
  2935. AddRecOps.push_back(getAddExpr(SumOps, SCEV::FlagAnyWrap, Depth + 1));
  2936. }
  2937. if (!Overflow) {
  2938. const SCEV *NewAddRec = getAddRecExpr(AddRecOps, AddRecLoop,
  2939. SCEV::FlagAnyWrap);
  2940. if (Ops.size() == 2) return NewAddRec;
  2941. Ops[Idx] = NewAddRec;
  2942. Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
  2943. OpsModified = true;
  2944. AddRec = dyn_cast<SCEVAddRecExpr>(NewAddRec);
  2945. if (!AddRec)
  2946. break;
  2947. }
  2948. }
  2949. if (OpsModified)
  2950. return getMulExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
  2951. // Otherwise couldn't fold anything into this recurrence. Move onto the
  2952. // next one.
  2953. }
  2954. // Okay, it looks like we really DO need an mul expr. Check to see if we
  2955. // already have one, otherwise create a new one.
  2956. return getOrCreateMulExpr(Ops, ComputeFlags(Ops));
  2957. }
  2958. /// Represents an unsigned remainder expression based on unsigned division.
  2959. const SCEV *ScalarEvolution::getURemExpr(const SCEV *LHS,
  2960. const SCEV *RHS) {
  2961. assert(getEffectiveSCEVType(LHS->getType()) ==
  2962. getEffectiveSCEVType(RHS->getType()) &&
  2963. "SCEVURemExpr operand types don't match!");
  2964. // Short-circuit easy cases
  2965. if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) {
  2966. // If constant is one, the result is trivial
  2967. if (RHSC->getValue()->isOne())
  2968. return getZero(LHS->getType()); // X urem 1 --> 0
  2969. // If constant is a power of two, fold into a zext(trunc(LHS)).
  2970. if (RHSC->getAPInt().isPowerOf2()) {
  2971. Type *FullTy = LHS->getType();
  2972. Type *TruncTy =
  2973. IntegerType::get(getContext(), RHSC->getAPInt().logBase2());
  2974. return getZeroExtendExpr(getTruncateExpr(LHS, TruncTy), FullTy);
  2975. }
  2976. }
  2977. // Fallback to %a == %x urem %y == %x -<nuw> ((%x udiv %y) *<nuw> %y)
  2978. const SCEV *UDiv = getUDivExpr(LHS, RHS);
  2979. const SCEV *Mult = getMulExpr(UDiv, RHS, SCEV::FlagNUW);
  2980. return getMinusSCEV(LHS, Mult, SCEV::FlagNUW);
  2981. }
  2982. /// Get a canonical unsigned division expression, or something simpler if
  2983. /// possible.
  2984. const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
  2985. const SCEV *RHS) {
  2986. assert(!LHS->getType()->isPointerTy() &&
  2987. "SCEVUDivExpr operand can't be pointer!");
  2988. assert(LHS->getType() == RHS->getType() &&
  2989. "SCEVUDivExpr operand types don't match!");
  2990. FoldingSetNodeID ID;
  2991. ID.AddInteger(scUDivExpr);
  2992. ID.AddPointer(LHS);
  2993. ID.AddPointer(RHS);
  2994. void *IP = nullptr;
  2995. if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP))
  2996. return S;
  2997. // 0 udiv Y == 0
  2998. if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS))
  2999. if (LHSC->getValue()->isZero())
  3000. return LHS;
  3001. if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) {
  3002. if (RHSC->getValue()->isOne())
  3003. return LHS; // X udiv 1 --> x
  3004. // If the denominator is zero, the result of the udiv is undefined. Don't
  3005. // try to analyze it, because the resolution chosen here may differ from
  3006. // the resolution chosen in other parts of the compiler.
  3007. if (!RHSC->getValue()->isZero()) {
  3008. // Determine if the division can be folded into the operands of
  3009. // its operands.
  3010. // TODO: Generalize this to non-constants by using known-bits information.
  3011. Type *Ty = LHS->getType();
  3012. unsigned LZ = RHSC->getAPInt().countLeadingZeros();
  3013. unsigned MaxShiftAmt = getTypeSizeInBits(Ty) - LZ - 1;
  3014. // For non-power-of-two values, effectively round the value up to the
  3015. // nearest power of two.
  3016. if (!RHSC->getAPInt().isPowerOf2())
  3017. ++MaxShiftAmt;
  3018. IntegerType *ExtTy =
  3019. IntegerType::get(getContext(), getTypeSizeInBits(Ty) + MaxShiftAmt);
  3020. if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS))
  3021. if (const SCEVConstant *Step =
  3022. dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this))) {
  3023. // {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded.
  3024. const APInt &StepInt = Step->getAPInt();
  3025. const APInt &DivInt = RHSC->getAPInt();
  3026. if (!StepInt.urem(DivInt) &&
  3027. getZeroExtendExpr(AR, ExtTy) ==
  3028. getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
  3029. getZeroExtendExpr(Step, ExtTy),
  3030. AR->getLoop(), SCEV::FlagAnyWrap)) {
  3031. SmallVector<const SCEV *, 4> Operands;
  3032. for (const SCEV *Op : AR->operands())
  3033. Operands.push_back(getUDivExpr(Op, RHS));
  3034. return getAddRecExpr(Operands, AR->getLoop(), SCEV::FlagNW);
  3035. }
  3036. /// Get a canonical UDivExpr for a recurrence.
  3037. /// {X,+,N}/C => {Y,+,N}/C where Y=X-(X%N). Safe when C%N=0.
  3038. // We can currently only fold X%N if X is constant.
  3039. const SCEVConstant *StartC = dyn_cast<SCEVConstant>(AR->getStart());
  3040. if (StartC && !DivInt.urem(StepInt) &&
  3041. getZeroExtendExpr(AR, ExtTy) ==
  3042. getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
  3043. getZeroExtendExpr(Step, ExtTy),
  3044. AR->getLoop(), SCEV::FlagAnyWrap)) {
  3045. const APInt &StartInt = StartC->getAPInt();
  3046. const APInt &StartRem = StartInt.urem(StepInt);
  3047. if (StartRem != 0) {
  3048. const SCEV *NewLHS =
  3049. getAddRecExpr(getConstant(StartInt - StartRem), Step,
  3050. AR->getLoop(), SCEV::FlagNW);
  3051. if (LHS != NewLHS) {
  3052. LHS = NewLHS;
  3053. // Reset the ID to include the new LHS, and check if it is
  3054. // already cached.
  3055. ID.clear();
  3056. ID.AddInteger(scUDivExpr);
  3057. ID.AddPointer(LHS);
  3058. ID.AddPointer(RHS);
  3059. IP = nullptr;
  3060. if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP))
  3061. return S;
  3062. }
  3063. }
  3064. }
  3065. }
  3066. // (A*B)/C --> A*(B/C) if safe and B/C can be folded.
  3067. if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(LHS)) {
  3068. SmallVector<const SCEV *, 4> Operands;
  3069. for (const SCEV *Op : M->operands())
  3070. Operands.push_back(getZeroExtendExpr(Op, ExtTy));
  3071. if (getZeroExtendExpr(M, ExtTy) == getMulExpr(Operands))
  3072. // Find an operand that's safely divisible.
  3073. for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
  3074. const SCEV *Op = M->getOperand(i);
  3075. const SCEV *Div = getUDivExpr(Op, RHSC);
  3076. if (!isa<SCEVUDivExpr>(Div) && getMulExpr(Div, RHSC) == Op) {
  3077. Operands = SmallVector<const SCEV *, 4>(M->operands());
  3078. Operands[i] = Div;
  3079. return getMulExpr(Operands);
  3080. }
  3081. }
  3082. }
  3083. // (A/B)/C --> A/(B*C) if safe and B*C can be folded.
  3084. if (const SCEVUDivExpr *OtherDiv = dyn_cast<SCEVUDivExpr>(LHS)) {
  3085. if (auto *DivisorConstant =
  3086. dyn_cast<SCEVConstant>(OtherDiv->getRHS())) {
  3087. bool Overflow = false;
  3088. APInt NewRHS =
  3089. DivisorConstant->getAPInt().umul_ov(RHSC->getAPInt(), Overflow);
  3090. if (Overflow) {
  3091. return getConstant(RHSC->getType(), 0, false);
  3092. }
  3093. return getUDivExpr(OtherDiv->getLHS(), getConstant(NewRHS));
  3094. }
  3095. }
  3096. // (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded.
  3097. if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(LHS)) {
  3098. SmallVector<const SCEV *, 4> Operands;
  3099. for (const SCEV *Op : A->operands())
  3100. Operands.push_back(getZeroExtendExpr(Op, ExtTy));
  3101. if (getZeroExtendExpr(A, ExtTy) == getAddExpr(Operands)) {
  3102. Operands.clear();
  3103. for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) {
  3104. const SCEV *Op = getUDivExpr(A->getOperand(i), RHS);
  3105. if (isa<SCEVUDivExpr>(Op) ||
  3106. getMulExpr(Op, RHS) != A->getOperand(i))
  3107. break;
  3108. Operands.push_back(Op);
  3109. }
  3110. if (Operands.size() == A->getNumOperands())
  3111. return getAddExpr(Operands);
  3112. }
  3113. }
  3114. // Fold if both operands are constant.
  3115. if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) {
  3116. Constant *LHSCV = LHSC->getValue();
  3117. Constant *RHSCV = RHSC->getValue();
  3118. return getConstant(cast<ConstantInt>(ConstantExpr::getUDiv(LHSCV,
  3119. RHSCV)));
  3120. }
  3121. }
  3122. }
  3123. // The Insertion Point (IP) might be invalid by now (due to UniqueSCEVs
  3124. // changes). Make sure we get a new one.
  3125. IP = nullptr;
  3126. if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
  3127. SCEV *S = new (SCEVAllocator) SCEVUDivExpr(ID.Intern(SCEVAllocator),
  3128. LHS, RHS);
  3129. UniqueSCEVs.InsertNode(S, IP);
  3130. registerUser(S, {LHS, RHS});
  3131. return S;
  3132. }
  3133. APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2) {
  3134. APInt A = C1->getAPInt().abs();
  3135. APInt B = C2->getAPInt().abs();
  3136. uint32_t ABW = A.getBitWidth();
  3137. uint32_t BBW = B.getBitWidth();
  3138. if (ABW > BBW)
  3139. B = B.zext(ABW);
  3140. else if (ABW < BBW)
  3141. A = A.zext(BBW);
  3142. return APIntOps::GreatestCommonDivisor(std::move(A), std::move(B));
  3143. }
  3144. /// Get a canonical unsigned division expression, or something simpler if
  3145. /// possible. There is no representation for an exact udiv in SCEV IR, but we
  3146. /// can attempt to remove factors from the LHS and RHS. We can't do this when
  3147. /// it's not exact because the udiv may be clearing bits.
  3148. const SCEV *ScalarEvolution::getUDivExactExpr(const SCEV *LHS,
  3149. const SCEV *RHS) {
  3150. // TODO: we could try to find factors in all sorts of things, but for now we
  3151. // just deal with u/exact (multiply, constant). See SCEVDivision towards the
  3152. // end of this file for inspiration.
  3153. const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS);
  3154. if (!Mul || !Mul->hasNoUnsignedWrap())
  3155. return getUDivExpr(LHS, RHS);
  3156. if (const SCEVConstant *RHSCst = dyn_cast<SCEVConstant>(RHS)) {
  3157. // If the mulexpr multiplies by a constant, then that constant must be the
  3158. // first element of the mulexpr.
  3159. if (const auto *LHSCst = dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
  3160. if (LHSCst == RHSCst) {
  3161. SmallVector<const SCEV *, 2> Operands(drop_begin(Mul->operands()));
  3162. return getMulExpr(Operands);
  3163. }
  3164. // We can't just assume that LHSCst divides RHSCst cleanly, it could be
  3165. // that there's a factor provided by one of the other terms. We need to
  3166. // check.
  3167. APInt Factor = gcd(LHSCst, RHSCst);
  3168. if (!Factor.isIntN(1)) {
  3169. LHSCst =
  3170. cast<SCEVConstant>(getConstant(LHSCst->getAPInt().udiv(Factor)));
  3171. RHSCst =
  3172. cast<SCEVConstant>(getConstant(RHSCst->getAPInt().udiv(Factor)));
  3173. SmallVector<const SCEV *, 2> Operands;
  3174. Operands.push_back(LHSCst);
  3175. Operands.append(Mul->op_begin() + 1, Mul->op_end());
  3176. LHS = getMulExpr(Operands);
  3177. RHS = RHSCst;
  3178. Mul = dyn_cast<SCEVMulExpr>(LHS);
  3179. if (!Mul)
  3180. return getUDivExactExpr(LHS, RHS);
  3181. }
  3182. }
  3183. }
  3184. for (int i = 0, e = Mul->getNumOperands(); i != e; ++i) {
  3185. if (Mul->getOperand(i) == RHS) {
  3186. SmallVector<const SCEV *, 2> Operands;
  3187. Operands.append(Mul->op_begin(), Mul->op_begin() + i);
  3188. Operands.append(Mul->op_begin() + i + 1, Mul->op_end());
  3189. return getMulExpr(Operands);
  3190. }
  3191. }
  3192. return getUDivExpr(LHS, RHS);
  3193. }
  3194. /// Get an add recurrence expression for the specified loop. Simplify the
  3195. /// expression as much as possible.
  3196. const SCEV *ScalarEvolution::getAddRecExpr(const SCEV *Start, const SCEV *Step,
  3197. const Loop *L,
  3198. SCEV::NoWrapFlags Flags) {
  3199. SmallVector<const SCEV *, 4> Operands;
  3200. Operands.push_back(Start);
  3201. if (const SCEVAddRecExpr *StepChrec = dyn_cast<SCEVAddRecExpr>(Step))
  3202. if (StepChrec->getLoop() == L) {
  3203. Operands.append(StepChrec->op_begin(), StepChrec->op_end());
  3204. return getAddRecExpr(Operands, L, maskFlags(Flags, SCEV::FlagNW));
  3205. }
  3206. Operands.push_back(Step);
  3207. return getAddRecExpr(Operands, L, Flags);
  3208. }
  3209. /// Get an add recurrence expression for the specified loop. Simplify the
  3210. /// expression as much as possible.
  3211. const SCEV *
  3212. ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
  3213. const Loop *L, SCEV::NoWrapFlags Flags) {
  3214. if (Operands.size() == 1) return Operands[0];
  3215. #ifndef NDEBUG
  3216. Type *ETy = getEffectiveSCEVType(Operands[0]->getType());
  3217. for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
  3218. assert(getEffectiveSCEVType(Operands[i]->getType()) == ETy &&
  3219. "SCEVAddRecExpr operand types don't match!");
  3220. assert(!Operands[i]->getType()->isPointerTy() && "Step must be integer");
  3221. }
  3222. for (unsigned i = 0, e = Operands.size(); i != e; ++i)
  3223. assert(isLoopInvariant(Operands[i], L) &&
  3224. "SCEVAddRecExpr operand is not loop-invariant!");
  3225. #endif
  3226. if (Operands.back()->isZero()) {
  3227. Operands.pop_back();
  3228. return getAddRecExpr(Operands, L, SCEV::FlagAnyWrap); // {X,+,0} --> X
  3229. }
  3230. // It's tempting to want to call getConstantMaxBackedgeTakenCount count here and
  3231. // use that information to infer NUW and NSW flags. However, computing a
  3232. // BE count requires calling getAddRecExpr, so we may not yet have a
  3233. // meaningful BE count at this point (and if we don't, we'd be stuck
  3234. // with a SCEVCouldNotCompute as the cached BE count).
  3235. Flags = StrengthenNoWrapFlags(this, scAddRecExpr, Operands, Flags);
  3236. // Canonicalize nested AddRecs in by nesting them in order of loop depth.
  3237. if (const SCEVAddRecExpr *NestedAR = dyn_cast<SCEVAddRecExpr>(Operands[0])) {
  3238. const Loop *NestedLoop = NestedAR->getLoop();
  3239. if (L->contains(NestedLoop)
  3240. ? (L->getLoopDepth() < NestedLoop->getLoopDepth())
  3241. : (!NestedLoop->contains(L) &&
  3242. DT.dominates(L->getHeader(), NestedLoop->getHeader()))) {
  3243. SmallVector<const SCEV *, 4> NestedOperands(NestedAR->operands());
  3244. Operands[0] = NestedAR->getStart();
  3245. // AddRecs require their operands be loop-invariant with respect to their
  3246. // loops. Don't perform this transformation if it would break this
  3247. // requirement.
  3248. bool AllInvariant = all_of(
  3249. Operands, [&](const SCEV *Op) { return isLoopInvariant(Op, L); });
  3250. if (AllInvariant) {
  3251. // Create a recurrence for the outer loop with the same step size.
  3252. //
  3253. // The outer recurrence keeps its NW flag but only keeps NUW/NSW if the
  3254. // inner recurrence has the same property.
  3255. SCEV::NoWrapFlags OuterFlags =
  3256. maskFlags(Flags, SCEV::FlagNW | NestedAR->getNoWrapFlags());
  3257. NestedOperands[0] = getAddRecExpr(Operands, L, OuterFlags);
  3258. AllInvariant = all_of(NestedOperands, [&](const SCEV *Op) {
  3259. return isLoopInvariant(Op, NestedLoop);
  3260. });
  3261. if (AllInvariant) {
  3262. // Ok, both add recurrences are valid after the transformation.
  3263. //
  3264. // The inner recurrence keeps its NW flag but only keeps NUW/NSW if
  3265. // the outer recurrence has the same property.
  3266. SCEV::NoWrapFlags InnerFlags =
  3267. maskFlags(NestedAR->getNoWrapFlags(), SCEV::FlagNW | Flags);
  3268. return getAddRecExpr(NestedOperands, NestedLoop, InnerFlags);
  3269. }
  3270. }
  3271. // Reset Operands to its original state.
  3272. Operands[0] = NestedAR;
  3273. }
  3274. }
  3275. // Okay, it looks like we really DO need an addrec expr. Check to see if we
  3276. // already have one, otherwise create a new one.
  3277. return getOrCreateAddRecExpr(Operands, L, Flags);
  3278. }
  3279. const SCEV *
  3280. ScalarEvolution::getGEPExpr(GEPOperator *GEP,
  3281. const SmallVectorImpl<const SCEV *> &IndexExprs) {
  3282. const SCEV *BaseExpr = getSCEV(GEP->getPointerOperand());
  3283. // getSCEV(Base)->getType() has the same address space as Base->getType()
  3284. // because SCEV::getType() preserves the address space.
  3285. Type *IntIdxTy = getEffectiveSCEVType(BaseExpr->getType());
  3286. const bool AssumeInBoundsFlags = [&]() {
  3287. if (!GEP->isInBounds())
  3288. return false;
  3289. // We'd like to propagate flags from the IR to the corresponding SCEV nodes,
  3290. // but to do that, we have to ensure that said flag is valid in the entire
  3291. // defined scope of the SCEV.
  3292. auto *GEPI = dyn_cast<Instruction>(GEP);
  3293. // TODO: non-instructions have global scope. We might be able to prove
  3294. // some global scope cases
  3295. return GEPI && isSCEVExprNeverPoison(GEPI);
  3296. }();
  3297. SCEV::NoWrapFlags OffsetWrap =
  3298. AssumeInBoundsFlags ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
  3299. Type *CurTy = GEP->getType();
  3300. bool FirstIter = true;
  3301. SmallVector<const SCEV *, 4> Offsets;
  3302. for (const SCEV *IndexExpr : IndexExprs) {
  3303. // Compute the (potentially symbolic) offset in bytes for this index.
  3304. if (StructType *STy = dyn_cast<StructType>(CurTy)) {
  3305. // For a struct, add the member offset.
  3306. ConstantInt *Index = cast<SCEVConstant>(IndexExpr)->getValue();
  3307. unsigned FieldNo = Index->getZExtValue();
  3308. const SCEV *FieldOffset = getOffsetOfExpr(IntIdxTy, STy, FieldNo);
  3309. Offsets.push_back(FieldOffset);
  3310. // Update CurTy to the type of the field at Index.
  3311. CurTy = STy->getTypeAtIndex(Index);
  3312. } else {
  3313. // Update CurTy to its element type.
  3314. if (FirstIter) {
  3315. assert(isa<PointerType>(CurTy) &&
  3316. "The first index of a GEP indexes a pointer");
  3317. CurTy = GEP->getSourceElementType();
  3318. FirstIter = false;
  3319. } else {
  3320. CurTy = GetElementPtrInst::getTypeAtIndex(CurTy, (uint64_t)0);
  3321. }
  3322. // For an array, add the element offset, explicitly scaled.
  3323. const SCEV *ElementSize = getSizeOfExpr(IntIdxTy, CurTy);
  3324. // Getelementptr indices are signed.
  3325. IndexExpr = getTruncateOrSignExtend(IndexExpr, IntIdxTy);
  3326. // Multiply the index by the element size to compute the element offset.
  3327. const SCEV *LocalOffset = getMulExpr(IndexExpr, ElementSize, OffsetWrap);
  3328. Offsets.push_back(LocalOffset);
  3329. }
  3330. }
  3331. // Handle degenerate case of GEP without offsets.
  3332. if (Offsets.empty())
  3333. return BaseExpr;
  3334. // Add the offsets together, assuming nsw if inbounds.
  3335. const SCEV *Offset = getAddExpr(Offsets, OffsetWrap);
  3336. // Add the base address and the offset. We cannot use the nsw flag, as the
  3337. // base address is unsigned. However, if we know that the offset is
  3338. // non-negative, we can use nuw.
  3339. SCEV::NoWrapFlags BaseWrap = AssumeInBoundsFlags && isKnownNonNegative(Offset)
  3340. ? SCEV::FlagNUW : SCEV::FlagAnyWrap;
  3341. auto *GEPExpr = getAddExpr(BaseExpr, Offset, BaseWrap);
  3342. assert(BaseExpr->getType() == GEPExpr->getType() &&
  3343. "GEP should not change type mid-flight.");
  3344. return GEPExpr;
  3345. }
  3346. SCEV *ScalarEvolution::findExistingSCEVInCache(SCEVTypes SCEVType,
  3347. ArrayRef<const SCEV *> Ops) {
  3348. FoldingSetNodeID ID;
  3349. ID.AddInteger(SCEVType);
  3350. for (const SCEV *Op : Ops)
  3351. ID.AddPointer(Op);
  3352. void *IP = nullptr;
  3353. return UniqueSCEVs.FindNodeOrInsertPos(ID, IP);
  3354. }
  3355. const SCEV *ScalarEvolution::getAbsExpr(const SCEV *Op, bool IsNSW) {
  3356. SCEV::NoWrapFlags Flags = IsNSW ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
  3357. return getSMaxExpr(Op, getNegativeSCEV(Op, Flags));
  3358. }
  3359. const SCEV *ScalarEvolution::getMinMaxExpr(SCEVTypes Kind,
  3360. SmallVectorImpl<const SCEV *> &Ops) {
  3361. assert(SCEVMinMaxExpr::isMinMaxType(Kind) && "Not a SCEVMinMaxExpr!");
  3362. assert(!Ops.empty() && "Cannot get empty (u|s)(min|max)!");
  3363. if (Ops.size() == 1) return Ops[0];
  3364. #ifndef NDEBUG
  3365. Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
  3366. for (unsigned i = 1, e = Ops.size(); i != e; ++i) {
  3367. assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
  3368. "Operand types don't match!");
  3369. assert(Ops[0]->getType()->isPointerTy() ==
  3370. Ops[i]->getType()->isPointerTy() &&
  3371. "min/max should be consistently pointerish");
  3372. }
  3373. #endif
  3374. bool IsSigned = Kind == scSMaxExpr || Kind == scSMinExpr;
  3375. bool IsMax = Kind == scSMaxExpr || Kind == scUMaxExpr;
  3376. // Sort by complexity, this groups all similar expression types together.
  3377. GroupByComplexity(Ops, &LI, DT);
  3378. // Check if we have created the same expression before.
  3379. if (const SCEV *S = findExistingSCEVInCache(Kind, Ops)) {
  3380. return S;
  3381. }
  3382. // If there are any constants, fold them together.
  3383. unsigned Idx = 0;
  3384. if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
  3385. ++Idx;
  3386. assert(Idx < Ops.size());
  3387. auto FoldOp = [&](const APInt &LHS, const APInt &RHS) {
  3388. if (Kind == scSMaxExpr)
  3389. return APIntOps::smax(LHS, RHS);
  3390. else if (Kind == scSMinExpr)
  3391. return APIntOps::smin(LHS, RHS);
  3392. else if (Kind == scUMaxExpr)
  3393. return APIntOps::umax(LHS, RHS);
  3394. else if (Kind == scUMinExpr)
  3395. return APIntOps::umin(LHS, RHS);
  3396. llvm_unreachable("Unknown SCEV min/max opcode");
  3397. };
  3398. while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
  3399. // We found two constants, fold them together!
  3400. ConstantInt *Fold = ConstantInt::get(
  3401. getContext(), FoldOp(LHSC->getAPInt(), RHSC->getAPInt()));
  3402. Ops[0] = getConstant(Fold);
  3403. Ops.erase(Ops.begin()+1); // Erase the folded element
  3404. if (Ops.size() == 1) return Ops[0];
  3405. LHSC = cast<SCEVConstant>(Ops[0]);
  3406. }
  3407. bool IsMinV = LHSC->getValue()->isMinValue(IsSigned);
  3408. bool IsMaxV = LHSC->getValue()->isMaxValue(IsSigned);
  3409. if (IsMax ? IsMinV : IsMaxV) {
  3410. // If we are left with a constant minimum(/maximum)-int, strip it off.
  3411. Ops.erase(Ops.begin());
  3412. --Idx;
  3413. } else if (IsMax ? IsMaxV : IsMinV) {
  3414. // If we have a max(/min) with a constant maximum(/minimum)-int,
  3415. // it will always be the extremum.
  3416. return LHSC;
  3417. }
  3418. if (Ops.size() == 1) return Ops[0];
  3419. }
  3420. // Find the first operation of the same kind
  3421. while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < Kind)
  3422. ++Idx;
  3423. // Check to see if one of the operands is of the same kind. If so, expand its
  3424. // operands onto our operand list, and recurse to simplify.
  3425. if (Idx < Ops.size()) {
  3426. bool DeletedAny = false;
  3427. while (Ops[Idx]->getSCEVType() == Kind) {
  3428. const SCEVMinMaxExpr *SMME = cast<SCEVMinMaxExpr>(Ops[Idx]);
  3429. Ops.erase(Ops.begin()+Idx);
  3430. Ops.append(SMME->op_begin(), SMME->op_end());
  3431. DeletedAny = true;
  3432. }
  3433. if (DeletedAny)
  3434. return getMinMaxExpr(Kind, Ops);
  3435. }
  3436. // Okay, check to see if the same value occurs in the operand list twice. If
  3437. // so, delete one. Since we sorted the list, these values are required to
  3438. // be adjacent.
  3439. llvm::CmpInst::Predicate GEPred =
  3440. IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
  3441. llvm::CmpInst::Predicate LEPred =
  3442. IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
  3443. llvm::CmpInst::Predicate FirstPred = IsMax ? GEPred : LEPred;
  3444. llvm::CmpInst::Predicate SecondPred = IsMax ? LEPred : GEPred;
  3445. for (unsigned i = 0, e = Ops.size() - 1; i != e; ++i) {
  3446. if (Ops[i] == Ops[i + 1] ||
  3447. isKnownViaNonRecursiveReasoning(FirstPred, Ops[i], Ops[i + 1])) {
  3448. // X op Y op Y --> X op Y
  3449. // X op Y --> X, if we know X, Y are ordered appropriately
  3450. Ops.erase(Ops.begin() + i + 1, Ops.begin() + i + 2);
  3451. --i;
  3452. --e;
  3453. } else if (isKnownViaNonRecursiveReasoning(SecondPred, Ops[i],
  3454. Ops[i + 1])) {
  3455. // X op Y --> Y, if we know X, Y are ordered appropriately
  3456. Ops.erase(Ops.begin() + i, Ops.begin() + i + 1);
  3457. --i;
  3458. --e;
  3459. }
  3460. }
  3461. if (Ops.size() == 1) return Ops[0];
  3462. assert(!Ops.empty() && "Reduced smax down to nothing!");
  3463. // Okay, it looks like we really DO need an expr. Check to see if we
  3464. // already have one, otherwise create a new one.
  3465. FoldingSetNodeID ID;
  3466. ID.AddInteger(Kind);
  3467. for (unsigned i = 0, e = Ops.size(); i != e; ++i)
  3468. ID.AddPointer(Ops[i]);
  3469. void *IP = nullptr;
  3470. const SCEV *ExistingSCEV = UniqueSCEVs.FindNodeOrInsertPos(ID, IP);
  3471. if (ExistingSCEV)
  3472. return ExistingSCEV;
  3473. const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
  3474. std::uninitialized_copy(Ops.begin(), Ops.end(), O);
  3475. SCEV *S = new (SCEVAllocator)
  3476. SCEVMinMaxExpr(ID.Intern(SCEVAllocator), Kind, O, Ops.size());
  3477. UniqueSCEVs.InsertNode(S, IP);
  3478. registerUser(S, Ops);
  3479. return S;
  3480. }
  3481. namespace {
  3482. class SCEVSequentialMinMaxDeduplicatingVisitor final
  3483. : public SCEVVisitor<SCEVSequentialMinMaxDeduplicatingVisitor,
  3484. Optional<const SCEV *>> {
  3485. using RetVal = Optional<const SCEV *>;
  3486. using Base = SCEVVisitor<SCEVSequentialMinMaxDeduplicatingVisitor, RetVal>;
  3487. ScalarEvolution &SE;
  3488. const SCEVTypes RootKind; // Must be a sequential min/max expression.
  3489. const SCEVTypes NonSequentialRootKind; // Non-sequential variant of RootKind.
  3490. SmallPtrSet<const SCEV *, 16> SeenOps;
  3491. bool canRecurseInto(SCEVTypes Kind) const {
  3492. // We can only recurse into the SCEV expression of the same effective type
  3493. // as the type of our root SCEV expression.
  3494. return RootKind == Kind || NonSequentialRootKind == Kind;
  3495. };
  3496. RetVal visitAnyMinMaxExpr(const SCEV *S) {
  3497. assert((isa<SCEVMinMaxExpr>(S) || isa<SCEVSequentialMinMaxExpr>(S)) &&
  3498. "Only for min/max expressions.");
  3499. SCEVTypes Kind = S->getSCEVType();
  3500. if (!canRecurseInto(Kind))
  3501. return S;
  3502. auto *NAry = cast<SCEVNAryExpr>(S);
  3503. SmallVector<const SCEV *> NewOps;
  3504. bool Changed =
  3505. visit(Kind, makeArrayRef(NAry->op_begin(), NAry->op_end()), NewOps);
  3506. if (!Changed)
  3507. return S;
  3508. if (NewOps.empty())
  3509. return None;
  3510. return isa<SCEVSequentialMinMaxExpr>(S)
  3511. ? SE.getSequentialMinMaxExpr(Kind, NewOps)
  3512. : SE.getMinMaxExpr(Kind, NewOps);
  3513. }
  3514. RetVal visit(const SCEV *S) {
  3515. // Has the whole operand been seen already?
  3516. if (!SeenOps.insert(S).second)
  3517. return None;
  3518. return Base::visit(S);
  3519. }
  3520. public:
  3521. SCEVSequentialMinMaxDeduplicatingVisitor(ScalarEvolution &SE,
  3522. SCEVTypes RootKind)
  3523. : SE(SE), RootKind(RootKind),
  3524. NonSequentialRootKind(
  3525. SCEVSequentialMinMaxExpr::getEquivalentNonSequentialSCEVType(
  3526. RootKind)) {}
  3527. bool /*Changed*/ visit(SCEVTypes Kind, ArrayRef<const SCEV *> OrigOps,
  3528. SmallVectorImpl<const SCEV *> &NewOps) {
  3529. bool Changed = false;
  3530. SmallVector<const SCEV *> Ops;
  3531. Ops.reserve(OrigOps.size());
  3532. for (const SCEV *Op : OrigOps) {
  3533. RetVal NewOp = visit(Op);
  3534. if (NewOp != Op)
  3535. Changed = true;
  3536. if (NewOp)
  3537. Ops.emplace_back(*NewOp);
  3538. }
  3539. if (Changed)
  3540. NewOps = std::move(Ops);
  3541. return Changed;
  3542. }
  3543. RetVal visitConstant(const SCEVConstant *Constant) { return Constant; }
  3544. RetVal visitPtrToIntExpr(const SCEVPtrToIntExpr *Expr) { return Expr; }
  3545. RetVal visitTruncateExpr(const SCEVTruncateExpr *Expr) { return Expr; }
  3546. RetVal visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) { return Expr; }
  3547. RetVal visitSignExtendExpr(const SCEVSignExtendExpr *Expr) { return Expr; }
  3548. RetVal visitAddExpr(const SCEVAddExpr *Expr) { return Expr; }
  3549. RetVal visitMulExpr(const SCEVMulExpr *Expr) { return Expr; }
  3550. RetVal visitUDivExpr(const SCEVUDivExpr *Expr) { return Expr; }
  3551. RetVal visitAddRecExpr(const SCEVAddRecExpr *Expr) { return Expr; }
  3552. RetVal visitSMaxExpr(const SCEVSMaxExpr *Expr) {
  3553. return visitAnyMinMaxExpr(Expr);
  3554. }
  3555. RetVal visitUMaxExpr(const SCEVUMaxExpr *Expr) {
  3556. return visitAnyMinMaxExpr(Expr);
  3557. }
  3558. RetVal visitSMinExpr(const SCEVSMinExpr *Expr) {
  3559. return visitAnyMinMaxExpr(Expr);
  3560. }
  3561. RetVal visitUMinExpr(const SCEVUMinExpr *Expr) {
  3562. return visitAnyMinMaxExpr(Expr);
  3563. }
  3564. RetVal visitSequentialUMinExpr(const SCEVSequentialUMinExpr *Expr) {
  3565. return visitAnyMinMaxExpr(Expr);
  3566. }
  3567. RetVal visitUnknown(const SCEVUnknown *Expr) { return Expr; }
  3568. RetVal visitCouldNotCompute(const SCEVCouldNotCompute *Expr) { return Expr; }
  3569. };
  3570. } // namespace
  3571. const SCEV *
  3572. ScalarEvolution::getSequentialMinMaxExpr(SCEVTypes Kind,
  3573. SmallVectorImpl<const SCEV *> &Ops) {
  3574. assert(SCEVSequentialMinMaxExpr::isSequentialMinMaxType(Kind) &&
  3575. "Not a SCEVSequentialMinMaxExpr!");
  3576. assert(!Ops.empty() && "Cannot get empty (u|s)(min|max)!");
  3577. if (Ops.size() == 1)
  3578. return Ops[0];
  3579. if (Ops.size() == 2 &&
  3580. any_of(Ops, [](const SCEV *Op) { return isa<SCEVConstant>(Op); }))
  3581. return getMinMaxExpr(
  3582. SCEVSequentialMinMaxExpr::getEquivalentNonSequentialSCEVType(Kind),
  3583. Ops);
  3584. #ifndef NDEBUG
  3585. Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
  3586. for (unsigned i = 1, e = Ops.size(); i != e; ++i) {
  3587. assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
  3588. "Operand types don't match!");
  3589. assert(Ops[0]->getType()->isPointerTy() ==
  3590. Ops[i]->getType()->isPointerTy() &&
  3591. "min/max should be consistently pointerish");
  3592. }
  3593. #endif
  3594. // Note that SCEVSequentialMinMaxExpr is *NOT* commutative,
  3595. // so we can *NOT* do any kind of sorting of the expressions!
  3596. // Check if we have created the same expression before.
  3597. if (const SCEV *S = findExistingSCEVInCache(Kind, Ops))
  3598. return S;
  3599. // FIXME: there are *some* simplifications that we can do here.
  3600. // Keep only the first instance of an operand.
  3601. {
  3602. SCEVSequentialMinMaxDeduplicatingVisitor Deduplicator(*this, Kind);
  3603. bool Changed = Deduplicator.visit(Kind, Ops, Ops);
  3604. if (Changed)
  3605. return getSequentialMinMaxExpr(Kind, Ops);
  3606. }
  3607. // Check to see if one of the operands is of the same kind. If so, expand its
  3608. // operands onto our operand list, and recurse to simplify.
  3609. {
  3610. unsigned Idx = 0;
  3611. bool DeletedAny = false;
  3612. while (Idx < Ops.size()) {
  3613. if (Ops[Idx]->getSCEVType() != Kind) {
  3614. ++Idx;
  3615. continue;
  3616. }
  3617. const auto *SMME = cast<SCEVSequentialMinMaxExpr>(Ops[Idx]);
  3618. Ops.erase(Ops.begin() + Idx);
  3619. Ops.insert(Ops.begin() + Idx, SMME->op_begin(), SMME->op_end());
  3620. DeletedAny = true;
  3621. }
  3622. if (DeletedAny)
  3623. return getSequentialMinMaxExpr(Kind, Ops);
  3624. }
  3625. // Okay, it looks like we really DO need an expr. Check to see if we
  3626. // already have one, otherwise create a new one.
  3627. FoldingSetNodeID ID;
  3628. ID.AddInteger(Kind);
  3629. for (unsigned i = 0, e = Ops.size(); i != e; ++i)
  3630. ID.AddPointer(Ops[i]);
  3631. void *IP = nullptr;
  3632. const SCEV *ExistingSCEV = UniqueSCEVs.FindNodeOrInsertPos(ID, IP);
  3633. if (ExistingSCEV)
  3634. return ExistingSCEV;
  3635. const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
  3636. std::uninitialized_copy(Ops.begin(), Ops.end(), O);
  3637. SCEV *S = new (SCEVAllocator)
  3638. SCEVSequentialMinMaxExpr(ID.Intern(SCEVAllocator), Kind, O, Ops.size());
  3639. UniqueSCEVs.InsertNode(S, IP);
  3640. registerUser(S, Ops);
  3641. return S;
  3642. }
  3643. const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS, const SCEV *RHS) {
  3644. SmallVector<const SCEV *, 2> Ops = {LHS, RHS};
  3645. return getSMaxExpr(Ops);
  3646. }
  3647. const SCEV *ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
  3648. return getMinMaxExpr(scSMaxExpr, Ops);
  3649. }
  3650. const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS, const SCEV *RHS) {
  3651. SmallVector<const SCEV *, 2> Ops = {LHS, RHS};
  3652. return getUMaxExpr(Ops);
  3653. }
  3654. const SCEV *ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
  3655. return getMinMaxExpr(scUMaxExpr, Ops);
  3656. }
  3657. const SCEV *ScalarEvolution::getSMinExpr(const SCEV *LHS,
  3658. const SCEV *RHS) {
  3659. SmallVector<const SCEV *, 2> Ops = { LHS, RHS };
  3660. return getSMinExpr(Ops);
  3661. }
  3662. const SCEV *ScalarEvolution::getSMinExpr(SmallVectorImpl<const SCEV *> &Ops) {
  3663. return getMinMaxExpr(scSMinExpr, Ops);
  3664. }
  3665. const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS, const SCEV *RHS,
  3666. bool Sequential) {
  3667. SmallVector<const SCEV *, 2> Ops = { LHS, RHS };
  3668. return getUMinExpr(Ops, Sequential);
  3669. }
  3670. const SCEV *ScalarEvolution::getUMinExpr(SmallVectorImpl<const SCEV *> &Ops,
  3671. bool Sequential) {
  3672. return Sequential ? getSequentialMinMaxExpr(scSequentialUMinExpr, Ops)
  3673. : getMinMaxExpr(scUMinExpr, Ops);
  3674. }
  3675. const SCEV *
  3676. ScalarEvolution::getSizeOfScalableVectorExpr(Type *IntTy,
  3677. ScalableVectorType *ScalableTy) {
  3678. Constant *NullPtr = Constant::getNullValue(ScalableTy->getPointerTo());
  3679. Constant *One = ConstantInt::get(IntTy, 1);
  3680. Constant *GEP = ConstantExpr::getGetElementPtr(ScalableTy, NullPtr, One);
  3681. // Note that the expression we created is the final expression, we don't
  3682. // want to simplify it any further Also, if we call a normal getSCEV(),
  3683. // we'll end up in an endless recursion. So just create an SCEVUnknown.
  3684. return getUnknown(ConstantExpr::getPtrToInt(GEP, IntTy));
  3685. }
  3686. const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) {
  3687. if (auto *ScalableAllocTy = dyn_cast<ScalableVectorType>(AllocTy))
  3688. return getSizeOfScalableVectorExpr(IntTy, ScalableAllocTy);
  3689. // We can bypass creating a target-independent constant expression and then
  3690. // folding it back into a ConstantInt. This is just a compile-time
  3691. // optimization.
  3692. return getConstant(IntTy, getDataLayout().getTypeAllocSize(AllocTy));
  3693. }
  3694. const SCEV *ScalarEvolution::getStoreSizeOfExpr(Type *IntTy, Type *StoreTy) {
  3695. if (auto *ScalableStoreTy = dyn_cast<ScalableVectorType>(StoreTy))
  3696. return getSizeOfScalableVectorExpr(IntTy, ScalableStoreTy);
  3697. // We can bypass creating a target-independent constant expression and then
  3698. // folding it back into a ConstantInt. This is just a compile-time
  3699. // optimization.
  3700. return getConstant(IntTy, getDataLayout().getTypeStoreSize(StoreTy));
  3701. }
  3702. const SCEV *ScalarEvolution::getOffsetOfExpr(Type *IntTy,
  3703. StructType *STy,
  3704. unsigned FieldNo) {
  3705. // We can bypass creating a target-independent constant expression and then
  3706. // folding it back into a ConstantInt. This is just a compile-time
  3707. // optimization.
  3708. return getConstant(
  3709. IntTy, getDataLayout().getStructLayout(STy)->getElementOffset(FieldNo));
  3710. }
  3711. const SCEV *ScalarEvolution::getUnknown(Value *V) {
  3712. // Don't attempt to do anything other than create a SCEVUnknown object
  3713. // here. createSCEV only calls getUnknown after checking for all other
  3714. // interesting possibilities, and any other code that calls getUnknown
  3715. // is doing so in order to hide a value from SCEV canonicalization.
  3716. FoldingSetNodeID ID;
  3717. ID.AddInteger(scUnknown);
  3718. ID.AddPointer(V);
  3719. void *IP = nullptr;
  3720. if (SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) {
  3721. assert(cast<SCEVUnknown>(S)->getValue() == V &&
  3722. "Stale SCEVUnknown in uniquing map!");
  3723. return S;
  3724. }
  3725. SCEV *S = new (SCEVAllocator) SCEVUnknown(ID.Intern(SCEVAllocator), V, this,
  3726. FirstUnknown);
  3727. FirstUnknown = cast<SCEVUnknown>(S);
  3728. UniqueSCEVs.InsertNode(S, IP);
  3729. return S;
  3730. }
  3731. //===----------------------------------------------------------------------===//
  3732. // Basic SCEV Analysis and PHI Idiom Recognition Code
  3733. //
  3734. /// Test if values of the given type are analyzable within the SCEV
  3735. /// framework. This primarily includes integer types, and it can optionally
  3736. /// include pointer types if the ScalarEvolution class has access to
  3737. /// target-specific information.
  3738. bool ScalarEvolution::isSCEVable(Type *Ty) const {
  3739. // Integers and pointers are always SCEVable.
  3740. return Ty->isIntOrPtrTy();
  3741. }
  3742. /// Return the size in bits of the specified type, for which isSCEVable must
  3743. /// return true.
  3744. uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const {
  3745. assert(isSCEVable(Ty) && "Type is not SCEVable!");
  3746. if (Ty->isPointerTy())
  3747. return getDataLayout().getIndexTypeSizeInBits(Ty);
  3748. return getDataLayout().getTypeSizeInBits(Ty);
  3749. }
  3750. /// Return a type with the same bitwidth as the given type and which represents
  3751. /// how SCEV will treat the given type, for which isSCEVable must return
  3752. /// true. For pointer types, this is the pointer index sized integer type.
  3753. Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const {
  3754. assert(isSCEVable(Ty) && "Type is not SCEVable!");
  3755. if (Ty->isIntegerTy())
  3756. return Ty;
  3757. // The only other support type is pointer.
  3758. assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!");
  3759. return getDataLayout().getIndexType(Ty);
  3760. }
  3761. Type *ScalarEvolution::getWiderType(Type *T1, Type *T2) const {
  3762. return getTypeSizeInBits(T1) >= getTypeSizeInBits(T2) ? T1 : T2;
  3763. }
  3764. bool ScalarEvolution::instructionCouldExistWitthOperands(const SCEV *A,
  3765. const SCEV *B) {
  3766. /// For a valid use point to exist, the defining scope of one operand
  3767. /// must dominate the other.
  3768. bool PreciseA, PreciseB;
  3769. auto *ScopeA = getDefiningScopeBound({A}, PreciseA);
  3770. auto *ScopeB = getDefiningScopeBound({B}, PreciseB);
  3771. if (!PreciseA || !PreciseB)
  3772. // Can't tell.
  3773. return false;
  3774. return (ScopeA == ScopeB) || DT.dominates(ScopeA, ScopeB) ||
  3775. DT.dominates(ScopeB, ScopeA);
  3776. }
  3777. const SCEV *ScalarEvolution::getCouldNotCompute() {
  3778. return CouldNotCompute.get();
  3779. }
  3780. bool ScalarEvolution::checkValidity(const SCEV *S) const {
  3781. bool ContainsNulls = SCEVExprContains(S, [](const SCEV *S) {
  3782. auto *SU = dyn_cast<SCEVUnknown>(S);
  3783. return SU && SU->getValue() == nullptr;
  3784. });
  3785. return !ContainsNulls;
  3786. }
  3787. bool ScalarEvolution::containsAddRecurrence(const SCEV *S) {
  3788. HasRecMapType::iterator I = HasRecMap.find(S);
  3789. if (I != HasRecMap.end())
  3790. return I->second;
  3791. bool FoundAddRec =
  3792. SCEVExprContains(S, [](const SCEV *S) { return isa<SCEVAddRecExpr>(S); });
  3793. HasRecMap.insert({S, FoundAddRec});
  3794. return FoundAddRec;
  3795. }
  3796. /// Try to split a SCEVAddExpr into a pair of {SCEV, ConstantInt}.
  3797. /// If \p S is a SCEVAddExpr and is composed of a sub SCEV S' and an
  3798. /// offset I, then return {S', I}, else return {\p S, nullptr}.
  3799. static std::pair<const SCEV *, ConstantInt *> splitAddExpr(const SCEV *S) {
  3800. const auto *Add = dyn_cast<SCEVAddExpr>(S);
  3801. if (!Add)
  3802. return {S, nullptr};
  3803. if (Add->getNumOperands() != 2)
  3804. return {S, nullptr};
  3805. auto *ConstOp = dyn_cast<SCEVConstant>(Add->getOperand(0));
  3806. if (!ConstOp)
  3807. return {S, nullptr};
  3808. return {Add->getOperand(1), ConstOp->getValue()};
  3809. }
  3810. /// Return the ValueOffsetPair set for \p S. \p S can be represented
  3811. /// by the value and offset from any ValueOffsetPair in the set.
  3812. ScalarEvolution::ValueOffsetPairSetVector *
  3813. ScalarEvolution::getSCEVValues(const SCEV *S) {
  3814. ExprValueMapType::iterator SI = ExprValueMap.find_as(S);
  3815. if (SI == ExprValueMap.end())
  3816. return nullptr;
  3817. #ifndef NDEBUG
  3818. if (VerifySCEVMap) {
  3819. // Check there is no dangling Value in the set returned.
  3820. for (const auto &VE : SI->second)
  3821. assert(ValueExprMap.count(VE.first));
  3822. }
  3823. #endif
  3824. return &SI->second;
  3825. }
  3826. /// Erase Value from ValueExprMap and ExprValueMap. ValueExprMap.erase(V)
  3827. /// cannot be used separately. eraseValueFromMap should be used to remove
  3828. /// V from ValueExprMap and ExprValueMap at the same time.
  3829. void ScalarEvolution::eraseValueFromMap(Value *V) {
  3830. ValueExprMapType::iterator I = ValueExprMap.find_as(V);
  3831. if (I != ValueExprMap.end()) {
  3832. const SCEV *S = I->second;
  3833. // Remove {V, 0} from the set of ExprValueMap[S]
  3834. if (auto *SV = getSCEVValues(S))
  3835. SV->remove({V, nullptr});
  3836. // Remove {V, Offset} from the set of ExprValueMap[Stripped]
  3837. const SCEV *Stripped;
  3838. ConstantInt *Offset;
  3839. std::tie(Stripped, Offset) = splitAddExpr(S);
  3840. if (Offset != nullptr) {
  3841. if (auto *SV = getSCEVValues(Stripped))
  3842. SV->remove({V, Offset});
  3843. }
  3844. ValueExprMap.erase(V);
  3845. }
  3846. }
  3847. void ScalarEvolution::insertValueToMap(Value *V, const SCEV *S) {
  3848. // A recursive query may have already computed the SCEV. It should be
  3849. // equivalent, but may not necessarily be exactly the same, e.g. due to lazily
  3850. // inferred nowrap flags.
  3851. auto It = ValueExprMap.find_as(V);
  3852. if (It == ValueExprMap.end()) {
  3853. ValueExprMap.insert({SCEVCallbackVH(V, this), S});
  3854. ExprValueMap[S].insert({V, nullptr});
  3855. }
  3856. }
  3857. /// Return an existing SCEV if it exists, otherwise analyze the expression and
  3858. /// create a new one.
  3859. const SCEV *ScalarEvolution::getSCEV(Value *V) {
  3860. assert(isSCEVable(V->getType()) && "Value is not SCEVable!");
  3861. const SCEV *S = getExistingSCEV(V);
  3862. if (S == nullptr) {
  3863. S = createSCEV(V);
  3864. // During PHI resolution, it is possible to create two SCEVs for the same
  3865. // V, so it is needed to double check whether V->S is inserted into
  3866. // ValueExprMap before insert S->{V, 0} into ExprValueMap.
  3867. std::pair<ValueExprMapType::iterator, bool> Pair =
  3868. ValueExprMap.insert({SCEVCallbackVH(V, this), S});
  3869. if (Pair.second) {
  3870. ExprValueMap[S].insert({V, nullptr});
  3871. // If S == Stripped + Offset, add Stripped -> {V, Offset} into
  3872. // ExprValueMap.
  3873. const SCEV *Stripped = S;
  3874. ConstantInt *Offset = nullptr;
  3875. std::tie(Stripped, Offset) = splitAddExpr(S);
  3876. // If stripped is SCEVUnknown, don't bother to save
  3877. // Stripped -> {V, offset}. It doesn't simplify and sometimes even
  3878. // increase the complexity of the expansion code.
  3879. // If V is GetElementPtrInst, don't save Stripped -> {V, offset}
  3880. // because it may generate add/sub instead of GEP in SCEV expansion.
  3881. if (Offset != nullptr && !isa<SCEVUnknown>(Stripped) &&
  3882. !isa<GetElementPtrInst>(V))
  3883. ExprValueMap[Stripped].insert({V, Offset});
  3884. }
  3885. }
  3886. return S;
  3887. }
  3888. const SCEV *ScalarEvolution::getExistingSCEV(Value *V) {
  3889. assert(isSCEVable(V->getType()) && "Value is not SCEVable!");
  3890. ValueExprMapType::iterator I = ValueExprMap.find_as(V);
  3891. if (I != ValueExprMap.end()) {
  3892. const SCEV *S = I->second;
  3893. assert(checkValidity(S) &&
  3894. "existing SCEV has not been properly invalidated");
  3895. return S;
  3896. }
  3897. return nullptr;
  3898. }
  3899. /// Return a SCEV corresponding to -V = -1*V
  3900. const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V,
  3901. SCEV::NoWrapFlags Flags) {
  3902. if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
  3903. return getConstant(
  3904. cast<ConstantInt>(ConstantExpr::getNeg(VC->getValue())));
  3905. Type *Ty = V->getType();
  3906. Ty = getEffectiveSCEVType(Ty);
  3907. return getMulExpr(V, getMinusOne(Ty), Flags);
  3908. }
  3909. /// If Expr computes ~A, return A else return nullptr
  3910. static const SCEV *MatchNotExpr(const SCEV *Expr) {
  3911. const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Expr);
  3912. if (!Add || Add->getNumOperands() != 2 ||
  3913. !Add->getOperand(0)->isAllOnesValue())
  3914. return nullptr;
  3915. const SCEVMulExpr *AddRHS = dyn_cast<SCEVMulExpr>(Add->getOperand(1));
  3916. if (!AddRHS || AddRHS->getNumOperands() != 2 ||
  3917. !AddRHS->getOperand(0)->isAllOnesValue())
  3918. return nullptr;
  3919. return AddRHS->getOperand(1);
  3920. }
  3921. /// Return a SCEV corresponding to ~V = -1-V
  3922. const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) {
  3923. assert(!V->getType()->isPointerTy() && "Can't negate pointer");
  3924. if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
  3925. return getConstant(
  3926. cast<ConstantInt>(ConstantExpr::getNot(VC->getValue())));
  3927. // Fold ~(u|s)(min|max)(~x, ~y) to (u|s)(max|min)(x, y)
  3928. if (const SCEVMinMaxExpr *MME = dyn_cast<SCEVMinMaxExpr>(V)) {
  3929. auto MatchMinMaxNegation = [&](const SCEVMinMaxExpr *MME) {
  3930. SmallVector<const SCEV *, 2> MatchedOperands;
  3931. for (const SCEV *Operand : MME->operands()) {
  3932. const SCEV *Matched = MatchNotExpr(Operand);
  3933. if (!Matched)
  3934. return (const SCEV *)nullptr;
  3935. MatchedOperands.push_back(Matched);
  3936. }
  3937. return getMinMaxExpr(SCEVMinMaxExpr::negate(MME->getSCEVType()),
  3938. MatchedOperands);
  3939. };
  3940. if (const SCEV *Replaced = MatchMinMaxNegation(MME))
  3941. return Replaced;
  3942. }
  3943. Type *Ty = V->getType();
  3944. Ty = getEffectiveSCEVType(Ty);
  3945. return getMinusSCEV(getMinusOne(Ty), V);
  3946. }
  3947. const SCEV *ScalarEvolution::removePointerBase(const SCEV *P) {
  3948. assert(P->getType()->isPointerTy());
  3949. if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(P)) {
  3950. // The base of an AddRec is the first operand.
  3951. SmallVector<const SCEV *> Ops{AddRec->operands()};
  3952. Ops[0] = removePointerBase(Ops[0]);
  3953. // Don't try to transfer nowrap flags for now. We could in some cases
  3954. // (for example, if pointer operand of the AddRec is a SCEVUnknown).
  3955. return getAddRecExpr(Ops, AddRec->getLoop(), SCEV::FlagAnyWrap);
  3956. }
  3957. if (auto *Add = dyn_cast<SCEVAddExpr>(P)) {
  3958. // The base of an Add is the pointer operand.
  3959. SmallVector<const SCEV *> Ops{Add->operands()};
  3960. const SCEV **PtrOp = nullptr;
  3961. for (const SCEV *&AddOp : Ops) {
  3962. if (AddOp->getType()->isPointerTy()) {
  3963. assert(!PtrOp && "Cannot have multiple pointer ops");
  3964. PtrOp = &AddOp;
  3965. }
  3966. }
  3967. *PtrOp = removePointerBase(*PtrOp);
  3968. // Don't try to transfer nowrap flags for now. We could in some cases
  3969. // (for example, if the pointer operand of the Add is a SCEVUnknown).
  3970. return getAddExpr(Ops);
  3971. }
  3972. // Any other expression must be a pointer base.
  3973. return getZero(P->getType());
  3974. }
  3975. const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
  3976. SCEV::NoWrapFlags Flags,
  3977. unsigned Depth) {
  3978. // Fast path: X - X --> 0.
  3979. if (LHS == RHS)
  3980. return getZero(LHS->getType());
  3981. // If we subtract two pointers with different pointer bases, bail.
  3982. // Eventually, we're going to add an assertion to getMulExpr that we
  3983. // can't multiply by a pointer.
  3984. if (RHS->getType()->isPointerTy()) {
  3985. if (!LHS->getType()->isPointerTy() ||
  3986. getPointerBase(LHS) != getPointerBase(RHS))
  3987. return getCouldNotCompute();
  3988. LHS = removePointerBase(LHS);
  3989. RHS = removePointerBase(RHS);
  3990. }
  3991. // We represent LHS - RHS as LHS + (-1)*RHS. This transformation
  3992. // makes it so that we cannot make much use of NUW.
  3993. auto AddFlags = SCEV::FlagAnyWrap;
  3994. const bool RHSIsNotMinSigned =
  3995. !getSignedRangeMin(RHS).isMinSignedValue();
  3996. if (hasFlags(Flags, SCEV::FlagNSW)) {
  3997. // Let M be the minimum representable signed value. Then (-1)*RHS
  3998. // signed-wraps if and only if RHS is M. That can happen even for
  3999. // a NSW subtraction because e.g. (-1)*M signed-wraps even though
  4000. // -1 - M does not. So to transfer NSW from LHS - RHS to LHS +
  4001. // (-1)*RHS, we need to prove that RHS != M.
  4002. //
  4003. // If LHS is non-negative and we know that LHS - RHS does not
  4004. // signed-wrap, then RHS cannot be M. So we can rule out signed-wrap
  4005. // either by proving that RHS > M or that LHS >= 0.
  4006. if (RHSIsNotMinSigned || isKnownNonNegative(LHS)) {
  4007. AddFlags = SCEV::FlagNSW;
  4008. }
  4009. }
  4010. // FIXME: Find a correct way to transfer NSW to (-1)*M when LHS -
  4011. // RHS is NSW and LHS >= 0.
  4012. //
  4013. // The difficulty here is that the NSW flag may have been proven
  4014. // relative to a loop that is to be found in a recurrence in LHS and
  4015. // not in RHS. Applying NSW to (-1)*M may then let the NSW have a
  4016. // larger scope than intended.
  4017. auto NegFlags = RHSIsNotMinSigned ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
  4018. return getAddExpr(LHS, getNegativeSCEV(RHS, NegFlags), AddFlags, Depth);
  4019. }
  4020. const SCEV *ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, Type *Ty,
  4021. unsigned Depth) {
  4022. Type *SrcTy = V->getType();
  4023. assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
  4024. "Cannot truncate or zero extend with non-integer arguments!");
  4025. if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
  4026. return V; // No conversion
  4027. if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty))
  4028. return getTruncateExpr(V, Ty, Depth);
  4029. return getZeroExtendExpr(V, Ty, Depth);
  4030. }
  4031. const SCEV *ScalarEvolution::getTruncateOrSignExtend(const SCEV *V, Type *Ty,
  4032. unsigned Depth) {
  4033. Type *SrcTy = V->getType();
  4034. assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
  4035. "Cannot truncate or zero extend with non-integer arguments!");
  4036. if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
  4037. return V; // No conversion
  4038. if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty))
  4039. return getTruncateExpr(V, Ty, Depth);
  4040. return getSignExtendExpr(V, Ty, Depth);
  4041. }
  4042. const SCEV *
  4043. ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, Type *Ty) {
  4044. Type *SrcTy = V->getType();
  4045. assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
  4046. "Cannot noop or zero extend with non-integer arguments!");
  4047. assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
  4048. "getNoopOrZeroExtend cannot truncate!");
  4049. if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
  4050. return V; // No conversion
  4051. return getZeroExtendExpr(V, Ty);
  4052. }
  4053. const SCEV *
  4054. ScalarEvolution::getNoopOrSignExtend(const SCEV *V, Type *Ty) {
  4055. Type *SrcTy = V->getType();
  4056. assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
  4057. "Cannot noop or sign extend with non-integer arguments!");
  4058. assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
  4059. "getNoopOrSignExtend cannot truncate!");
  4060. if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
  4061. return V; // No conversion
  4062. return getSignExtendExpr(V, Ty);
  4063. }
  4064. const SCEV *
  4065. ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, Type *Ty) {
  4066. Type *SrcTy = V->getType();
  4067. assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
  4068. "Cannot noop or any extend with non-integer arguments!");
  4069. assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
  4070. "getNoopOrAnyExtend cannot truncate!");
  4071. if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
  4072. return V; // No conversion
  4073. return getAnyExtendExpr(V, Ty);
  4074. }
  4075. const SCEV *
  4076. ScalarEvolution::getTruncateOrNoop(const SCEV *V, Type *Ty) {
  4077. Type *SrcTy = V->getType();
  4078. assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
  4079. "Cannot truncate or noop with non-integer arguments!");
  4080. assert(getTypeSizeInBits(SrcTy) >= getTypeSizeInBits(Ty) &&
  4081. "getTruncateOrNoop cannot extend!");
  4082. if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
  4083. return V; // No conversion
  4084. return getTruncateExpr(V, Ty);
  4085. }
  4086. const SCEV *ScalarEvolution::getUMaxFromMismatchedTypes(const SCEV *LHS,
  4087. const SCEV *RHS) {
  4088. const SCEV *PromotedLHS = LHS;
  4089. const SCEV *PromotedRHS = RHS;
  4090. if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType()))
  4091. PromotedRHS = getZeroExtendExpr(RHS, LHS->getType());
  4092. else
  4093. PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType());
  4094. return getUMaxExpr(PromotedLHS, PromotedRHS);
  4095. }
  4096. const SCEV *ScalarEvolution::getUMinFromMismatchedTypes(const SCEV *LHS,
  4097. const SCEV *RHS,
  4098. bool Sequential) {
  4099. SmallVector<const SCEV *, 2> Ops = { LHS, RHS };
  4100. return getUMinFromMismatchedTypes(Ops, Sequential);
  4101. }
  4102. const SCEV *
  4103. ScalarEvolution::getUMinFromMismatchedTypes(SmallVectorImpl<const SCEV *> &Ops,
  4104. bool Sequential) {
  4105. assert(!Ops.empty() && "At least one operand must be!");
  4106. // Trivial case.
  4107. if (Ops.size() == 1)
  4108. return Ops[0];
  4109. // Find the max type first.
  4110. Type *MaxType = nullptr;
  4111. for (auto *S : Ops)
  4112. if (MaxType)
  4113. MaxType = getWiderType(MaxType, S->getType());
  4114. else
  4115. MaxType = S->getType();
  4116. assert(MaxType && "Failed to find maximum type!");
  4117. // Extend all ops to max type.
  4118. SmallVector<const SCEV *, 2> PromotedOps;
  4119. for (auto *S : Ops)
  4120. PromotedOps.push_back(getNoopOrZeroExtend(S, MaxType));
  4121. // Generate umin.
  4122. return getUMinExpr(PromotedOps, Sequential);
  4123. }
  4124. const SCEV *ScalarEvolution::getPointerBase(const SCEV *V) {
  4125. // A pointer operand may evaluate to a nonpointer expression, such as null.
  4126. if (!V->getType()->isPointerTy())
  4127. return V;
  4128. while (true) {
  4129. if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(V)) {
  4130. V = AddRec->getStart();
  4131. } else if (auto *Add = dyn_cast<SCEVAddExpr>(V)) {
  4132. const SCEV *PtrOp = nullptr;
  4133. for (const SCEV *AddOp : Add->operands()) {
  4134. if (AddOp->getType()->isPointerTy()) {
  4135. assert(!PtrOp && "Cannot have multiple pointer ops");
  4136. PtrOp = AddOp;
  4137. }
  4138. }
  4139. assert(PtrOp && "Must have pointer op");
  4140. V = PtrOp;
  4141. } else // Not something we can look further into.
  4142. return V;
  4143. }
  4144. }
  4145. /// Push users of the given Instruction onto the given Worklist.
  4146. static void PushDefUseChildren(Instruction *I,
  4147. SmallVectorImpl<Instruction *> &Worklist,
  4148. SmallPtrSetImpl<Instruction *> &Visited) {
  4149. // Push the def-use children onto the Worklist stack.
  4150. for (User *U : I->users()) {
  4151. auto *UserInsn = cast<Instruction>(U);
  4152. if (Visited.insert(UserInsn).second)
  4153. Worklist.push_back(UserInsn);
  4154. }
  4155. }
  4156. namespace {
  4157. /// Takes SCEV S and Loop L. For each AddRec sub-expression, use its start
  4158. /// expression in case its Loop is L. If it is not L then
  4159. /// if IgnoreOtherLoops is true then use AddRec itself
  4160. /// otherwise rewrite cannot be done.
  4161. /// If SCEV contains non-invariant unknown SCEV rewrite cannot be done.
  4162. class SCEVInitRewriter : public SCEVRewriteVisitor<SCEVInitRewriter> {
  4163. public:
  4164. static const SCEV *rewrite(const SCEV *S, const Loop *L, ScalarEvolution &SE,
  4165. bool IgnoreOtherLoops = true) {
  4166. SCEVInitRewriter Rewriter(L, SE);
  4167. const SCEV *Result = Rewriter.visit(S);
  4168. if (Rewriter.hasSeenLoopVariantSCEVUnknown())
  4169. return SE.getCouldNotCompute();
  4170. return Rewriter.hasSeenOtherLoops() && !IgnoreOtherLoops
  4171. ? SE.getCouldNotCompute()
  4172. : Result;
  4173. }
  4174. const SCEV *visitUnknown(const SCEVUnknown *Expr) {
  4175. if (!SE.isLoopInvariant(Expr, L))
  4176. SeenLoopVariantSCEVUnknown = true;
  4177. return Expr;
  4178. }
  4179. const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
  4180. // Only re-write AddRecExprs for this loop.
  4181. if (Expr->getLoop() == L)
  4182. return Expr->getStart();
  4183. SeenOtherLoops = true;
  4184. return Expr;
  4185. }
  4186. bool hasSeenLoopVariantSCEVUnknown() { return SeenLoopVariantSCEVUnknown; }
  4187. bool hasSeenOtherLoops() { return SeenOtherLoops; }
  4188. private:
  4189. explicit SCEVInitRewriter(const Loop *L, ScalarEvolution &SE)
  4190. : SCEVRewriteVisitor(SE), L(L) {}
  4191. const Loop *L;
  4192. bool SeenLoopVariantSCEVUnknown = false;
  4193. bool SeenOtherLoops = false;
  4194. };
  4195. /// Takes SCEV S and Loop L. For each AddRec sub-expression, use its post
  4196. /// increment expression in case its Loop is L. If it is not L then
  4197. /// use AddRec itself.
  4198. /// If SCEV contains non-invariant unknown SCEV rewrite cannot be done.
  4199. class SCEVPostIncRewriter : public SCEVRewriteVisitor<SCEVPostIncRewriter> {
  4200. public:
  4201. static const SCEV *rewrite(const SCEV *S, const Loop *L, ScalarEvolution &SE) {
  4202. SCEVPostIncRewriter Rewriter(L, SE);
  4203. const SCEV *Result = Rewriter.visit(S);
  4204. return Rewriter.hasSeenLoopVariantSCEVUnknown()
  4205. ? SE.getCouldNotCompute()
  4206. : Result;
  4207. }
  4208. const SCEV *visitUnknown(const SCEVUnknown *Expr) {
  4209. if (!SE.isLoopInvariant(Expr, L))
  4210. SeenLoopVariantSCEVUnknown = true;
  4211. return Expr;
  4212. }
  4213. const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
  4214. // Only re-write AddRecExprs for this loop.
  4215. if (Expr->getLoop() == L)
  4216. return Expr->getPostIncExpr(SE);
  4217. SeenOtherLoops = true;
  4218. return Expr;
  4219. }
  4220. bool hasSeenLoopVariantSCEVUnknown() { return SeenLoopVariantSCEVUnknown; }
  4221. bool hasSeenOtherLoops() { return SeenOtherLoops; }
  4222. private:
  4223. explicit SCEVPostIncRewriter(const Loop *L, ScalarEvolution &SE)
  4224. : SCEVRewriteVisitor(SE), L(L) {}
  4225. const Loop *L;
  4226. bool SeenLoopVariantSCEVUnknown = false;
  4227. bool SeenOtherLoops = false;
  4228. };
  4229. /// This class evaluates the compare condition by matching it against the
  4230. /// condition of loop latch. If there is a match we assume a true value
  4231. /// for the condition while building SCEV nodes.
  4232. class SCEVBackedgeConditionFolder
  4233. : public SCEVRewriteVisitor<SCEVBackedgeConditionFolder> {
  4234. public:
  4235. static const SCEV *rewrite(const SCEV *S, const Loop *L,
  4236. ScalarEvolution &SE) {
  4237. bool IsPosBECond = false;
  4238. Value *BECond = nullptr;
  4239. if (BasicBlock *Latch = L->getLoopLatch()) {
  4240. BranchInst *BI = dyn_cast<BranchInst>(Latch->getTerminator());
  4241. if (BI && BI->isConditional()) {
  4242. assert(BI->getSuccessor(0) != BI->getSuccessor(1) &&
  4243. "Both outgoing branches should not target same header!");
  4244. BECond = BI->getCondition();
  4245. IsPosBECond = BI->getSuccessor(0) == L->getHeader();
  4246. } else {
  4247. return S;
  4248. }
  4249. }
  4250. SCEVBackedgeConditionFolder Rewriter(L, BECond, IsPosBECond, SE);
  4251. return Rewriter.visit(S);
  4252. }
  4253. const SCEV *visitUnknown(const SCEVUnknown *Expr) {
  4254. const SCEV *Result = Expr;
  4255. bool InvariantF = SE.isLoopInvariant(Expr, L);
  4256. if (!InvariantF) {
  4257. Instruction *I = cast<Instruction>(Expr->getValue());
  4258. switch (I->getOpcode()) {
  4259. case Instruction::Select: {
  4260. SelectInst *SI = cast<SelectInst>(I);
  4261. Optional<const SCEV *> Res =
  4262. compareWithBackedgeCondition(SI->getCondition());
  4263. if (Res.hasValue()) {
  4264. bool IsOne = cast<SCEVConstant>(Res.getValue())->getValue()->isOne();
  4265. Result = SE.getSCEV(IsOne ? SI->getTrueValue() : SI->getFalseValue());
  4266. }
  4267. break;
  4268. }
  4269. default: {
  4270. Optional<const SCEV *> Res = compareWithBackedgeCondition(I);
  4271. if (Res.hasValue())
  4272. Result = Res.getValue();
  4273. break;
  4274. }
  4275. }
  4276. }
  4277. return Result;
  4278. }
  4279. private:
  4280. explicit SCEVBackedgeConditionFolder(const Loop *L, Value *BECond,
  4281. bool IsPosBECond, ScalarEvolution &SE)
  4282. : SCEVRewriteVisitor(SE), L(L), BackedgeCond(BECond),
  4283. IsPositiveBECond(IsPosBECond) {}
  4284. Optional<const SCEV *> compareWithBackedgeCondition(Value *IC);
  4285. const Loop *L;
  4286. /// Loop back condition.
  4287. Value *BackedgeCond = nullptr;
  4288. /// Set to true if loop back is on positive branch condition.
  4289. bool IsPositiveBECond;
  4290. };
  4291. Optional<const SCEV *>
  4292. SCEVBackedgeConditionFolder::compareWithBackedgeCondition(Value *IC) {
  4293. // If value matches the backedge condition for loop latch,
  4294. // then return a constant evolution node based on loopback
  4295. // branch taken.
  4296. if (BackedgeCond == IC)
  4297. return IsPositiveBECond ? SE.getOne(Type::getInt1Ty(SE.getContext()))
  4298. : SE.getZero(Type::getInt1Ty(SE.getContext()));
  4299. return None;
  4300. }
  4301. class SCEVShiftRewriter : public SCEVRewriteVisitor<SCEVShiftRewriter> {
  4302. public:
  4303. static const SCEV *rewrite(const SCEV *S, const Loop *L,
  4304. ScalarEvolution &SE) {
  4305. SCEVShiftRewriter Rewriter(L, SE);
  4306. const SCEV *Result = Rewriter.visit(S);
  4307. return Rewriter.isValid() ? Result : SE.getCouldNotCompute();
  4308. }
  4309. const SCEV *visitUnknown(const SCEVUnknown *Expr) {
  4310. // Only allow AddRecExprs for this loop.
  4311. if (!SE.isLoopInvariant(Expr, L))
  4312. Valid = false;
  4313. return Expr;
  4314. }
  4315. const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
  4316. if (Expr->getLoop() == L && Expr->isAffine())
  4317. return SE.getMinusSCEV(Expr, Expr->getStepRecurrence(SE));
  4318. Valid = false;
  4319. return Expr;
  4320. }
  4321. bool isValid() { return Valid; }
  4322. private:
  4323. explicit SCEVShiftRewriter(const Loop *L, ScalarEvolution &SE)
  4324. : SCEVRewriteVisitor(SE), L(L) {}
  4325. const Loop *L;
  4326. bool Valid = true;
  4327. };
  4328. } // end anonymous namespace
  4329. SCEV::NoWrapFlags
  4330. ScalarEvolution::proveNoWrapViaConstantRanges(const SCEVAddRecExpr *AR) {
  4331. if (!AR->isAffine())
  4332. return SCEV::FlagAnyWrap;
  4333. using OBO = OverflowingBinaryOperator;
  4334. SCEV::NoWrapFlags Result = SCEV::FlagAnyWrap;
  4335. if (!AR->hasNoSignedWrap()) {
  4336. ConstantRange AddRecRange = getSignedRange(AR);
  4337. ConstantRange IncRange = getSignedRange(AR->getStepRecurrence(*this));
  4338. auto NSWRegion = ConstantRange::makeGuaranteedNoWrapRegion(
  4339. Instruction::Add, IncRange, OBO::NoSignedWrap);
  4340. if (NSWRegion.contains(AddRecRange))
  4341. Result = ScalarEvolution::setFlags(Result, SCEV::FlagNSW);
  4342. }
  4343. if (!AR->hasNoUnsignedWrap()) {
  4344. ConstantRange AddRecRange = getUnsignedRange(AR);
  4345. ConstantRange IncRange = getUnsignedRange(AR->getStepRecurrence(*this));
  4346. auto NUWRegion = ConstantRange::makeGuaranteedNoWrapRegion(
  4347. Instruction::Add, IncRange, OBO::NoUnsignedWrap);
  4348. if (NUWRegion.contains(AddRecRange))
  4349. Result = ScalarEvolution::setFlags(Result, SCEV::FlagNUW);
  4350. }
  4351. return Result;
  4352. }
  4353. SCEV::NoWrapFlags
  4354. ScalarEvolution::proveNoSignedWrapViaInduction(const SCEVAddRecExpr *AR) {
  4355. SCEV::NoWrapFlags Result = AR->getNoWrapFlags();
  4356. if (AR->hasNoSignedWrap())
  4357. return Result;
  4358. if (!AR->isAffine())
  4359. return Result;
  4360. const SCEV *Step = AR->getStepRecurrence(*this);
  4361. const Loop *L = AR->getLoop();
  4362. // Check whether the backedge-taken count is SCEVCouldNotCompute.
  4363. // Note that this serves two purposes: It filters out loops that are
  4364. // simply not analyzable, and it covers the case where this code is
  4365. // being called from within backedge-taken count analysis, such that
  4366. // attempting to ask for the backedge-taken count would likely result
  4367. // in infinite recursion. In the later case, the analysis code will
  4368. // cope with a conservative value, and it will take care to purge
  4369. // that value once it has finished.
  4370. const SCEV *MaxBECount = getConstantMaxBackedgeTakenCount(L);
  4371. // Normally, in the cases we can prove no-overflow via a
  4372. // backedge guarding condition, we can also compute a backedge
  4373. // taken count for the loop. The exceptions are assumptions and
  4374. // guards present in the loop -- SCEV is not great at exploiting
  4375. // these to compute max backedge taken counts, but can still use
  4376. // these to prove lack of overflow. Use this fact to avoid
  4377. // doing extra work that may not pay off.
  4378. if (isa<SCEVCouldNotCompute>(MaxBECount) && !HasGuards &&
  4379. AC.assumptions().empty())
  4380. return Result;
  4381. // If the backedge is guarded by a comparison with the pre-inc value the
  4382. // addrec is safe. Also, if the entry is guarded by a comparison with the
  4383. // start value and the backedge is guarded by a comparison with the post-inc
  4384. // value, the addrec is safe.
  4385. ICmpInst::Predicate Pred;
  4386. const SCEV *OverflowLimit =
  4387. getSignedOverflowLimitForStep(Step, &Pred, this);
  4388. if (OverflowLimit &&
  4389. (isLoopBackedgeGuardedByCond(L, Pred, AR, OverflowLimit) ||
  4390. isKnownOnEveryIteration(Pred, AR, OverflowLimit))) {
  4391. Result = setFlags(Result, SCEV::FlagNSW);
  4392. }
  4393. return Result;
  4394. }
  4395. SCEV::NoWrapFlags
  4396. ScalarEvolution::proveNoUnsignedWrapViaInduction(const SCEVAddRecExpr *AR) {
  4397. SCEV::NoWrapFlags Result = AR->getNoWrapFlags();
  4398. if (AR->hasNoUnsignedWrap())
  4399. return Result;
  4400. if (!AR->isAffine())
  4401. return Result;
  4402. const SCEV *Step = AR->getStepRecurrence(*this);
  4403. unsigned BitWidth = getTypeSizeInBits(AR->getType());
  4404. const Loop *L = AR->getLoop();
  4405. // Check whether the backedge-taken count is SCEVCouldNotCompute.
  4406. // Note that this serves two purposes: It filters out loops that are
  4407. // simply not analyzable, and it covers the case where this code is
  4408. // being called from within backedge-taken count analysis, such that
  4409. // attempting to ask for the backedge-taken count would likely result
  4410. // in infinite recursion. In the later case, the analysis code will
  4411. // cope with a conservative value, and it will take care to purge
  4412. // that value once it has finished.
  4413. const SCEV *MaxBECount = getConstantMaxBackedgeTakenCount(L);
  4414. // Normally, in the cases we can prove no-overflow via a
  4415. // backedge guarding condition, we can also compute a backedge
  4416. // taken count for the loop. The exceptions are assumptions and
  4417. // guards present in the loop -- SCEV is not great at exploiting
  4418. // these to compute max backedge taken counts, but can still use
  4419. // these to prove lack of overflow. Use this fact to avoid
  4420. // doing extra work that may not pay off.
  4421. if (isa<SCEVCouldNotCompute>(MaxBECount) && !HasGuards &&
  4422. AC.assumptions().empty())
  4423. return Result;
  4424. // If the backedge is guarded by a comparison with the pre-inc value the
  4425. // addrec is safe. Also, if the entry is guarded by a comparison with the
  4426. // start value and the backedge is guarded by a comparison with the post-inc
  4427. // value, the addrec is safe.
  4428. if (isKnownPositive(Step)) {
  4429. const SCEV *N = getConstant(APInt::getMinValue(BitWidth) -
  4430. getUnsignedRangeMax(Step));
  4431. if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, AR, N) ||
  4432. isKnownOnEveryIteration(ICmpInst::ICMP_ULT, AR, N)) {
  4433. Result = setFlags(Result, SCEV::FlagNUW);
  4434. }
  4435. }
  4436. return Result;
  4437. }
  4438. namespace {
  4439. /// Represents an abstract binary operation. This may exist as a
  4440. /// normal instruction or constant expression, or may have been
  4441. /// derived from an expression tree.
  4442. struct BinaryOp {
  4443. unsigned Opcode;
  4444. Value *LHS;
  4445. Value *RHS;
  4446. bool IsNSW = false;
  4447. bool IsNUW = false;
  4448. /// Op is set if this BinaryOp corresponds to a concrete LLVM instruction or
  4449. /// constant expression.
  4450. Operator *Op = nullptr;
  4451. explicit BinaryOp(Operator *Op)
  4452. : Opcode(Op->getOpcode()), LHS(Op->getOperand(0)), RHS(Op->getOperand(1)),
  4453. Op(Op) {
  4454. if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(Op)) {
  4455. IsNSW = OBO->hasNoSignedWrap();
  4456. IsNUW = OBO->hasNoUnsignedWrap();
  4457. }
  4458. }
  4459. explicit BinaryOp(unsigned Opcode, Value *LHS, Value *RHS, bool IsNSW = false,
  4460. bool IsNUW = false)
  4461. : Opcode(Opcode), LHS(LHS), RHS(RHS), IsNSW(IsNSW), IsNUW(IsNUW) {}
  4462. };
  4463. } // end anonymous namespace
  4464. /// Try to map \p V into a BinaryOp, and return \c None on failure.
  4465. static Optional<BinaryOp> MatchBinaryOp(Value *V, DominatorTree &DT) {
  4466. auto *Op = dyn_cast<Operator>(V);
  4467. if (!Op)
  4468. return None;
  4469. // Implementation detail: all the cleverness here should happen without
  4470. // creating new SCEV expressions -- our caller knowns tricks to avoid creating
  4471. // SCEV expressions when possible, and we should not break that.
  4472. switch (Op->getOpcode()) {
  4473. case Instruction::Add:
  4474. case Instruction::Sub:
  4475. case Instruction::Mul:
  4476. case Instruction::UDiv:
  4477. case Instruction::URem:
  4478. case Instruction::And:
  4479. case Instruction::Or:
  4480. case Instruction::AShr:
  4481. case Instruction::Shl:
  4482. return BinaryOp(Op);
  4483. case Instruction::Xor:
  4484. if (auto *RHSC = dyn_cast<ConstantInt>(Op->getOperand(1)))
  4485. // If the RHS of the xor is a signmask, then this is just an add.
  4486. // Instcombine turns add of signmask into xor as a strength reduction step.
  4487. if (RHSC->getValue().isSignMask())
  4488. return BinaryOp(Instruction::Add, Op->getOperand(0), Op->getOperand(1));
  4489. return BinaryOp(Op);
  4490. case Instruction::LShr:
  4491. // Turn logical shift right of a constant into a unsigned divide.
  4492. if (ConstantInt *SA = dyn_cast<ConstantInt>(Op->getOperand(1))) {
  4493. uint32_t BitWidth = cast<IntegerType>(Op->getType())->getBitWidth();
  4494. // If the shift count is not less than the bitwidth, the result of
  4495. // the shift is undefined. Don't try to analyze it, because the
  4496. // resolution chosen here may differ from the resolution chosen in
  4497. // other parts of the compiler.
  4498. if (SA->getValue().ult(BitWidth)) {
  4499. Constant *X =
  4500. ConstantInt::get(SA->getContext(),
  4501. APInt::getOneBitSet(BitWidth, SA->getZExtValue()));
  4502. return BinaryOp(Instruction::UDiv, Op->getOperand(0), X);
  4503. }
  4504. }
  4505. return BinaryOp(Op);
  4506. case Instruction::ExtractValue: {
  4507. auto *EVI = cast<ExtractValueInst>(Op);
  4508. if (EVI->getNumIndices() != 1 || EVI->getIndices()[0] != 0)
  4509. break;
  4510. auto *WO = dyn_cast<WithOverflowInst>(EVI->getAggregateOperand());
  4511. if (!WO)
  4512. break;
  4513. Instruction::BinaryOps BinOp = WO->getBinaryOp();
  4514. bool Signed = WO->isSigned();
  4515. // TODO: Should add nuw/nsw flags for mul as well.
  4516. if (BinOp == Instruction::Mul || !isOverflowIntrinsicNoWrap(WO, DT))
  4517. return BinaryOp(BinOp, WO->getLHS(), WO->getRHS());
  4518. // Now that we know that all uses of the arithmetic-result component of
  4519. // CI are guarded by the overflow check, we can go ahead and pretend
  4520. // that the arithmetic is non-overflowing.
  4521. return BinaryOp(BinOp, WO->getLHS(), WO->getRHS(),
  4522. /* IsNSW = */ Signed, /* IsNUW = */ !Signed);
  4523. }
  4524. default:
  4525. break;
  4526. }
  4527. // Recognise intrinsic loop.decrement.reg, and as this has exactly the same
  4528. // semantics as a Sub, return a binary sub expression.
  4529. if (auto *II = dyn_cast<IntrinsicInst>(V))
  4530. if (II->getIntrinsicID() == Intrinsic::loop_decrement_reg)
  4531. return BinaryOp(Instruction::Sub, II->getOperand(0), II->getOperand(1));
  4532. return None;
  4533. }
  4534. /// Helper function to createAddRecFromPHIWithCasts. We have a phi
  4535. /// node whose symbolic (unknown) SCEV is \p SymbolicPHI, which is updated via
  4536. /// the loop backedge by a SCEVAddExpr, possibly also with a few casts on the
  4537. /// way. This function checks if \p Op, an operand of this SCEVAddExpr,
  4538. /// follows one of the following patterns:
  4539. /// Op == (SExt ix (Trunc iy (%SymbolicPHI) to ix) to iy)
  4540. /// Op == (ZExt ix (Trunc iy (%SymbolicPHI) to ix) to iy)
  4541. /// If the SCEV expression of \p Op conforms with one of the expected patterns
  4542. /// we return the type of the truncation operation, and indicate whether the
  4543. /// truncated type should be treated as signed/unsigned by setting
  4544. /// \p Signed to true/false, respectively.
  4545. static Type *isSimpleCastedPHI(const SCEV *Op, const SCEVUnknown *SymbolicPHI,
  4546. bool &Signed, ScalarEvolution &SE) {
  4547. // The case where Op == SymbolicPHI (that is, with no type conversions on
  4548. // the way) is handled by the regular add recurrence creating logic and
  4549. // would have already been triggered in createAddRecForPHI. Reaching it here
  4550. // means that createAddRecFromPHI had failed for this PHI before (e.g.,
  4551. // because one of the other operands of the SCEVAddExpr updating this PHI is
  4552. // not invariant).
  4553. //
  4554. // Here we look for the case where Op = (ext(trunc(SymbolicPHI))), and in
  4555. // this case predicates that allow us to prove that Op == SymbolicPHI will
  4556. // be added.
  4557. if (Op == SymbolicPHI)
  4558. return nullptr;
  4559. unsigned SourceBits = SE.getTypeSizeInBits(SymbolicPHI->getType());
  4560. unsigned NewBits = SE.getTypeSizeInBits(Op->getType());
  4561. if (SourceBits != NewBits)
  4562. return nullptr;
  4563. const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(Op);
  4564. const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(Op);
  4565. if (!SExt && !ZExt)
  4566. return nullptr;
  4567. const SCEVTruncateExpr *Trunc =
  4568. SExt ? dyn_cast<SCEVTruncateExpr>(SExt->getOperand())
  4569. : dyn_cast<SCEVTruncateExpr>(ZExt->getOperand());
  4570. if (!Trunc)
  4571. return nullptr;
  4572. const SCEV *X = Trunc->getOperand();
  4573. if (X != SymbolicPHI)
  4574. return nullptr;
  4575. Signed = SExt != nullptr;
  4576. return Trunc->getType();
  4577. }
  4578. static const Loop *isIntegerLoopHeaderPHI(const PHINode *PN, LoopInfo &LI) {
  4579. if (!PN->getType()->isIntegerTy())
  4580. return nullptr;
  4581. const Loop *L = LI.getLoopFor(PN->getParent());
  4582. if (!L || L->getHeader() != PN->getParent())
  4583. return nullptr;
  4584. return L;
  4585. }
  4586. // Analyze \p SymbolicPHI, a SCEV expression of a phi node, and check if the
  4587. // computation that updates the phi follows the following pattern:
  4588. // (SExt/ZExt ix (Trunc iy (%SymbolicPHI) to ix) to iy) + InvariantAccum
  4589. // which correspond to a phi->trunc->sext/zext->add->phi update chain.
  4590. // If so, try to see if it can be rewritten as an AddRecExpr under some
  4591. // Predicates. If successful, return them as a pair. Also cache the results
  4592. // of the analysis.
  4593. //
  4594. // Example usage scenario:
  4595. // Say the Rewriter is called for the following SCEV:
  4596. // 8 * ((sext i32 (trunc i64 %X to i32) to i64) + %Step)
  4597. // where:
  4598. // %X = phi i64 (%Start, %BEValue)
  4599. // It will visitMul->visitAdd->visitSExt->visitTrunc->visitUnknown(%X),
  4600. // and call this function with %SymbolicPHI = %X.
  4601. //
  4602. // The analysis will find that the value coming around the backedge has
  4603. // the following SCEV:
  4604. // BEValue = ((sext i32 (trunc i64 %X to i32) to i64) + %Step)
  4605. // Upon concluding that this matches the desired pattern, the function
  4606. // will return the pair {NewAddRec, SmallPredsVec} where:
  4607. // NewAddRec = {%Start,+,%Step}
  4608. // SmallPredsVec = {P1, P2, P3} as follows:
  4609. // P1(WrapPred): AR: {trunc(%Start),+,(trunc %Step)}<nsw> Flags: <nssw>
  4610. // P2(EqualPred): %Start == (sext i32 (trunc i64 %Start to i32) to i64)
  4611. // P3(EqualPred): %Step == (sext i32 (trunc i64 %Step to i32) to i64)
  4612. // The returned pair means that SymbolicPHI can be rewritten into NewAddRec
  4613. // under the predicates {P1,P2,P3}.
  4614. // This predicated rewrite will be cached in PredicatedSCEVRewrites:
  4615. // PredicatedSCEVRewrites[{%X,L}] = {NewAddRec, {P1,P2,P3)}
  4616. //
  4617. // TODO's:
  4618. //
  4619. // 1) Extend the Induction descriptor to also support inductions that involve
  4620. // casts: When needed (namely, when we are called in the context of the
  4621. // vectorizer induction analysis), a Set of cast instructions will be
  4622. // populated by this method, and provided back to isInductionPHI. This is
  4623. // needed to allow the vectorizer to properly record them to be ignored by
  4624. // the cost model and to avoid vectorizing them (otherwise these casts,
  4625. // which are redundant under the runtime overflow checks, will be
  4626. // vectorized, which can be costly).
  4627. //
  4628. // 2) Support additional induction/PHISCEV patterns: We also want to support
  4629. // inductions where the sext-trunc / zext-trunc operations (partly) occur
  4630. // after the induction update operation (the induction increment):
  4631. //
  4632. // (Trunc iy (SExt/ZExt ix (%SymbolicPHI + InvariantAccum) to iy) to ix)
  4633. // which correspond to a phi->add->trunc->sext/zext->phi update chain.
  4634. //
  4635. // (Trunc iy ((SExt/ZExt ix (%SymbolicPhi) to iy) + InvariantAccum) to ix)
  4636. // which correspond to a phi->trunc->add->sext/zext->phi update chain.
  4637. //
  4638. // 3) Outline common code with createAddRecFromPHI to avoid duplication.
  4639. Optional<std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>>
  4640. ScalarEvolution::createAddRecFromPHIWithCastsImpl(const SCEVUnknown *SymbolicPHI) {
  4641. SmallVector<const SCEVPredicate *, 3> Predicates;
  4642. // *** Part1: Analyze if we have a phi-with-cast pattern for which we can
  4643. // return an AddRec expression under some predicate.
  4644. auto *PN = cast<PHINode>(SymbolicPHI->getValue());
  4645. const Loop *L = isIntegerLoopHeaderPHI(PN, LI);
  4646. assert(L && "Expecting an integer loop header phi");
  4647. // The loop may have multiple entrances or multiple exits; we can analyze
  4648. // this phi as an addrec if it has a unique entry value and a unique
  4649. // backedge value.
  4650. Value *BEValueV = nullptr, *StartValueV = nullptr;
  4651. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
  4652. Value *V = PN->getIncomingValue(i);
  4653. if (L->contains(PN->getIncomingBlock(i))) {
  4654. if (!BEValueV) {
  4655. BEValueV = V;
  4656. } else if (BEValueV != V) {
  4657. BEValueV = nullptr;
  4658. break;
  4659. }
  4660. } else if (!StartValueV) {
  4661. StartValueV = V;
  4662. } else if (StartValueV != V) {
  4663. StartValueV = nullptr;
  4664. break;
  4665. }
  4666. }
  4667. if (!BEValueV || !StartValueV)
  4668. return None;
  4669. const SCEV *BEValue = getSCEV(BEValueV);
  4670. // If the value coming around the backedge is an add with the symbolic
  4671. // value we just inserted, possibly with casts that we can ignore under
  4672. // an appropriate runtime guard, then we found a simple induction variable!
  4673. const auto *Add = dyn_cast<SCEVAddExpr>(BEValue);
  4674. if (!Add)
  4675. return None;
  4676. // If there is a single occurrence of the symbolic value, possibly
  4677. // casted, replace it with a recurrence.
  4678. unsigned FoundIndex = Add->getNumOperands();
  4679. Type *TruncTy = nullptr;
  4680. bool Signed;
  4681. for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
  4682. if ((TruncTy =
  4683. isSimpleCastedPHI(Add->getOperand(i), SymbolicPHI, Signed, *this)))
  4684. if (FoundIndex == e) {
  4685. FoundIndex = i;
  4686. break;
  4687. }
  4688. if (FoundIndex == Add->getNumOperands())
  4689. return None;
  4690. // Create an add with everything but the specified operand.
  4691. SmallVector<const SCEV *, 8> Ops;
  4692. for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
  4693. if (i != FoundIndex)
  4694. Ops.push_back(Add->getOperand(i));
  4695. const SCEV *Accum = getAddExpr(Ops);
  4696. // The runtime checks will not be valid if the step amount is
  4697. // varying inside the loop.
  4698. if (!isLoopInvariant(Accum, L))
  4699. return None;
  4700. // *** Part2: Create the predicates
  4701. // Analysis was successful: we have a phi-with-cast pattern for which we
  4702. // can return an AddRec expression under the following predicates:
  4703. //
  4704. // P1: A Wrap predicate that guarantees that Trunc(Start) + i*Trunc(Accum)
  4705. // fits within the truncated type (does not overflow) for i = 0 to n-1.
  4706. // P2: An Equal predicate that guarantees that
  4707. // Start = (Ext ix (Trunc iy (Start) to ix) to iy)
  4708. // P3: An Equal predicate that guarantees that
  4709. // Accum = (Ext ix (Trunc iy (Accum) to ix) to iy)
  4710. //
  4711. // As we next prove, the above predicates guarantee that:
  4712. // Start + i*Accum = (Ext ix (Trunc iy ( Start + i*Accum ) to ix) to iy)
  4713. //
  4714. //
  4715. // More formally, we want to prove that:
  4716. // Expr(i+1) = Start + (i+1) * Accum
  4717. // = (Ext ix (Trunc iy (Expr(i)) to ix) to iy) + Accum
  4718. //
  4719. // Given that:
  4720. // 1) Expr(0) = Start
  4721. // 2) Expr(1) = Start + Accum
  4722. // = (Ext ix (Trunc iy (Start) to ix) to iy) + Accum :: from P2
  4723. // 3) Induction hypothesis (step i):
  4724. // Expr(i) = (Ext ix (Trunc iy (Expr(i-1)) to ix) to iy) + Accum
  4725. //
  4726. // Proof:
  4727. // Expr(i+1) =
  4728. // = Start + (i+1)*Accum
  4729. // = (Start + i*Accum) + Accum
  4730. // = Expr(i) + Accum
  4731. // = (Ext ix (Trunc iy (Expr(i-1)) to ix) to iy) + Accum + Accum
  4732. // :: from step i
  4733. //
  4734. // = (Ext ix (Trunc iy (Start + (i-1)*Accum) to ix) to iy) + Accum + Accum
  4735. //
  4736. // = (Ext ix (Trunc iy (Start + (i-1)*Accum) to ix) to iy)
  4737. // + (Ext ix (Trunc iy (Accum) to ix) to iy)
  4738. // + Accum :: from P3
  4739. //
  4740. // = (Ext ix (Trunc iy ((Start + (i-1)*Accum) + Accum) to ix) to iy)
  4741. // + Accum :: from P1: Ext(x)+Ext(y)=>Ext(x+y)
  4742. //
  4743. // = (Ext ix (Trunc iy (Start + i*Accum) to ix) to iy) + Accum
  4744. // = (Ext ix (Trunc iy (Expr(i)) to ix) to iy) + Accum
  4745. //
  4746. // By induction, the same applies to all iterations 1<=i<n:
  4747. //
  4748. // Create a truncated addrec for which we will add a no overflow check (P1).
  4749. const SCEV *StartVal = getSCEV(StartValueV);
  4750. const SCEV *PHISCEV =
  4751. getAddRecExpr(getTruncateExpr(StartVal, TruncTy),
  4752. getTruncateExpr(Accum, TruncTy), L, SCEV::FlagAnyWrap);
  4753. // PHISCEV can be either a SCEVConstant or a SCEVAddRecExpr.
  4754. // ex: If truncated Accum is 0 and StartVal is a constant, then PHISCEV
  4755. // will be constant.
  4756. //
  4757. // If PHISCEV is a constant, then P1 degenerates into P2 or P3, so we don't
  4758. // add P1.
  4759. if (const auto *AR = dyn_cast<SCEVAddRecExpr>(PHISCEV)) {
  4760. SCEVWrapPredicate::IncrementWrapFlags AddedFlags =
  4761. Signed ? SCEVWrapPredicate::IncrementNSSW
  4762. : SCEVWrapPredicate::IncrementNUSW;
  4763. const SCEVPredicate *AddRecPred = getWrapPredicate(AR, AddedFlags);
  4764. Predicates.push_back(AddRecPred);
  4765. }
  4766. // Create the Equal Predicates P2,P3:
  4767. // It is possible that the predicates P2 and/or P3 are computable at
  4768. // compile time due to StartVal and/or Accum being constants.
  4769. // If either one is, then we can check that now and escape if either P2
  4770. // or P3 is false.
  4771. // Construct the extended SCEV: (Ext ix (Trunc iy (Expr) to ix) to iy)
  4772. // for each of StartVal and Accum
  4773. auto getExtendedExpr = [&](const SCEV *Expr,
  4774. bool CreateSignExtend) -> const SCEV * {
  4775. assert(isLoopInvariant(Expr, L) && "Expr is expected to be invariant");
  4776. const SCEV *TruncatedExpr = getTruncateExpr(Expr, TruncTy);
  4777. const SCEV *ExtendedExpr =
  4778. CreateSignExtend ? getSignExtendExpr(TruncatedExpr, Expr->getType())
  4779. : getZeroExtendExpr(TruncatedExpr, Expr->getType());
  4780. return ExtendedExpr;
  4781. };
  4782. // Given:
  4783. // ExtendedExpr = (Ext ix (Trunc iy (Expr) to ix) to iy
  4784. // = getExtendedExpr(Expr)
  4785. // Determine whether the predicate P: Expr == ExtendedExpr
  4786. // is known to be false at compile time
  4787. auto PredIsKnownFalse = [&](const SCEV *Expr,
  4788. const SCEV *ExtendedExpr) -> bool {
  4789. return Expr != ExtendedExpr &&
  4790. isKnownPredicate(ICmpInst::ICMP_NE, Expr, ExtendedExpr);
  4791. };
  4792. const SCEV *StartExtended = getExtendedExpr(StartVal, Signed);
  4793. if (PredIsKnownFalse(StartVal, StartExtended)) {
  4794. LLVM_DEBUG(dbgs() << "P2 is compile-time false\n";);
  4795. return None;
  4796. }
  4797. // The Step is always Signed (because the overflow checks are either
  4798. // NSSW or NUSW)
  4799. const SCEV *AccumExtended = getExtendedExpr(Accum, /*CreateSignExtend=*/true);
  4800. if (PredIsKnownFalse(Accum, AccumExtended)) {
  4801. LLVM_DEBUG(dbgs() << "P3 is compile-time false\n";);
  4802. return None;
  4803. }
  4804. auto AppendPredicate = [&](const SCEV *Expr,
  4805. const SCEV *ExtendedExpr) -> void {
  4806. if (Expr != ExtendedExpr &&
  4807. !isKnownPredicate(ICmpInst::ICMP_EQ, Expr, ExtendedExpr)) {
  4808. const SCEVPredicate *Pred = getEqualPredicate(Expr, ExtendedExpr);
  4809. LLVM_DEBUG(dbgs() << "Added Predicate: " << *Pred);
  4810. Predicates.push_back(Pred);
  4811. }
  4812. };
  4813. AppendPredicate(StartVal, StartExtended);
  4814. AppendPredicate(Accum, AccumExtended);
  4815. // *** Part3: Predicates are ready. Now go ahead and create the new addrec in
  4816. // which the casts had been folded away. The caller can rewrite SymbolicPHI
  4817. // into NewAR if it will also add the runtime overflow checks specified in
  4818. // Predicates.
  4819. auto *NewAR = getAddRecExpr(StartVal, Accum, L, SCEV::FlagAnyWrap);
  4820. std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>> PredRewrite =
  4821. std::make_pair(NewAR, Predicates);
  4822. // Remember the result of the analysis for this SCEV at this locayyytion.
  4823. PredicatedSCEVRewrites[{SymbolicPHI, L}] = PredRewrite;
  4824. return PredRewrite;
  4825. }
  4826. Optional<std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>>
  4827. ScalarEvolution::createAddRecFromPHIWithCasts(const SCEVUnknown *SymbolicPHI) {
  4828. auto *PN = cast<PHINode>(SymbolicPHI->getValue());
  4829. const Loop *L = isIntegerLoopHeaderPHI(PN, LI);
  4830. if (!L)
  4831. return None;
  4832. // Check to see if we already analyzed this PHI.
  4833. auto I = PredicatedSCEVRewrites.find({SymbolicPHI, L});
  4834. if (I != PredicatedSCEVRewrites.end()) {
  4835. std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>> Rewrite =
  4836. I->second;
  4837. // Analysis was done before and failed to create an AddRec:
  4838. if (Rewrite.first == SymbolicPHI)
  4839. return None;
  4840. // Analysis was done before and succeeded to create an AddRec under
  4841. // a predicate:
  4842. assert(isa<SCEVAddRecExpr>(Rewrite.first) && "Expected an AddRec");
  4843. assert(!(Rewrite.second).empty() && "Expected to find Predicates");
  4844. return Rewrite;
  4845. }
  4846. Optional<std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>>
  4847. Rewrite = createAddRecFromPHIWithCastsImpl(SymbolicPHI);
  4848. // Record in the cache that the analysis failed
  4849. if (!Rewrite) {
  4850. SmallVector<const SCEVPredicate *, 3> Predicates;
  4851. PredicatedSCEVRewrites[{SymbolicPHI, L}] = {SymbolicPHI, Predicates};
  4852. return None;
  4853. }
  4854. return Rewrite;
  4855. }
  4856. // FIXME: This utility is currently required because the Rewriter currently
  4857. // does not rewrite this expression:
  4858. // {0, +, (sext ix (trunc iy to ix) to iy)}
  4859. // into {0, +, %step},
  4860. // even when the following Equal predicate exists:
  4861. // "%step == (sext ix (trunc iy to ix) to iy)".
  4862. bool PredicatedScalarEvolution::areAddRecsEqualWithPreds(
  4863. const SCEVAddRecExpr *AR1, const SCEVAddRecExpr *AR2) const {
  4864. if (AR1 == AR2)
  4865. return true;
  4866. auto areExprsEqual = [&](const SCEV *Expr1, const SCEV *Expr2) -> bool {
  4867. if (Expr1 != Expr2 && !Preds.implies(SE.getEqualPredicate(Expr1, Expr2)) &&
  4868. !Preds.implies(SE.getEqualPredicate(Expr2, Expr1)))
  4869. return false;
  4870. return true;
  4871. };
  4872. if (!areExprsEqual(AR1->getStart(), AR2->getStart()) ||
  4873. !areExprsEqual(AR1->getStepRecurrence(SE), AR2->getStepRecurrence(SE)))
  4874. return false;
  4875. return true;
  4876. }
  4877. /// A helper function for createAddRecFromPHI to handle simple cases.
  4878. ///
  4879. /// This function tries to find an AddRec expression for the simplest (yet most
  4880. /// common) cases: PN = PHI(Start, OP(Self, LoopInvariant)).
  4881. /// If it fails, createAddRecFromPHI will use a more general, but slow,
  4882. /// technique for finding the AddRec expression.
  4883. const SCEV *ScalarEvolution::createSimpleAffineAddRec(PHINode *PN,
  4884. Value *BEValueV,
  4885. Value *StartValueV) {
  4886. const Loop *L = LI.getLoopFor(PN->getParent());
  4887. assert(L && L->getHeader() == PN->getParent());
  4888. assert(BEValueV && StartValueV);
  4889. auto BO = MatchBinaryOp(BEValueV, DT);
  4890. if (!BO)
  4891. return nullptr;
  4892. if (BO->Opcode != Instruction::Add)
  4893. return nullptr;
  4894. const SCEV *Accum = nullptr;
  4895. if (BO->LHS == PN && L->isLoopInvariant(BO->RHS))
  4896. Accum = getSCEV(BO->RHS);
  4897. else if (BO->RHS == PN && L->isLoopInvariant(BO->LHS))
  4898. Accum = getSCEV(BO->LHS);
  4899. if (!Accum)
  4900. return nullptr;
  4901. SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
  4902. if (BO->IsNUW)
  4903. Flags = setFlags(Flags, SCEV::FlagNUW);
  4904. if (BO->IsNSW)
  4905. Flags = setFlags(Flags, SCEV::FlagNSW);
  4906. const SCEV *StartVal = getSCEV(StartValueV);
  4907. const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags);
  4908. insertValueToMap(PN, PHISCEV);
  4909. // We can add Flags to the post-inc expression only if we
  4910. // know that it is *undefined behavior* for BEValueV to
  4911. // overflow.
  4912. if (auto *BEInst = dyn_cast<Instruction>(BEValueV)) {
  4913. assert(isLoopInvariant(Accum, L) &&
  4914. "Accum is defined outside L, but is not invariant?");
  4915. if (isAddRecNeverPoison(BEInst, L))
  4916. (void)getAddRecExpr(getAddExpr(StartVal, Accum), Accum, L, Flags);
  4917. }
  4918. return PHISCEV;
  4919. }
  4920. const SCEV *ScalarEvolution::createAddRecFromPHI(PHINode *PN) {
  4921. const Loop *L = LI.getLoopFor(PN->getParent());
  4922. if (!L || L->getHeader() != PN->getParent())
  4923. return nullptr;
  4924. // The loop may have multiple entrances or multiple exits; we can analyze
  4925. // this phi as an addrec if it has a unique entry value and a unique
  4926. // backedge value.
  4927. Value *BEValueV = nullptr, *StartValueV = nullptr;
  4928. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
  4929. Value *V = PN->getIncomingValue(i);
  4930. if (L->contains(PN->getIncomingBlock(i))) {
  4931. if (!BEValueV) {
  4932. BEValueV = V;
  4933. } else if (BEValueV != V) {
  4934. BEValueV = nullptr;
  4935. break;
  4936. }
  4937. } else if (!StartValueV) {
  4938. StartValueV = V;
  4939. } else if (StartValueV != V) {
  4940. StartValueV = nullptr;
  4941. break;
  4942. }
  4943. }
  4944. if (!BEValueV || !StartValueV)
  4945. return nullptr;
  4946. assert(ValueExprMap.find_as(PN) == ValueExprMap.end() &&
  4947. "PHI node already processed?");
  4948. // First, try to find AddRec expression without creating a fictituos symbolic
  4949. // value for PN.
  4950. if (auto *S = createSimpleAffineAddRec(PN, BEValueV, StartValueV))
  4951. return S;
  4952. // Handle PHI node value symbolically.
  4953. const SCEV *SymbolicName = getUnknown(PN);
  4954. insertValueToMap(PN, SymbolicName);
  4955. // Using this symbolic name for the PHI, analyze the value coming around
  4956. // the back-edge.
  4957. const SCEV *BEValue = getSCEV(BEValueV);
  4958. // NOTE: If BEValue is loop invariant, we know that the PHI node just
  4959. // has a special value for the first iteration of the loop.
  4960. // If the value coming around the backedge is an add with the symbolic
  4961. // value we just inserted, then we found a simple induction variable!
  4962. if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(BEValue)) {
  4963. // If there is a single occurrence of the symbolic value, replace it
  4964. // with a recurrence.
  4965. unsigned FoundIndex = Add->getNumOperands();
  4966. for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
  4967. if (Add->getOperand(i) == SymbolicName)
  4968. if (FoundIndex == e) {
  4969. FoundIndex = i;
  4970. break;
  4971. }
  4972. if (FoundIndex != Add->getNumOperands()) {
  4973. // Create an add with everything but the specified operand.
  4974. SmallVector<const SCEV *, 8> Ops;
  4975. for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
  4976. if (i != FoundIndex)
  4977. Ops.push_back(SCEVBackedgeConditionFolder::rewrite(Add->getOperand(i),
  4978. L, *this));
  4979. const SCEV *Accum = getAddExpr(Ops);
  4980. // This is not a valid addrec if the step amount is varying each
  4981. // loop iteration, but is not itself an addrec in this loop.
  4982. if (isLoopInvariant(Accum, L) ||
  4983. (isa<SCEVAddRecExpr>(Accum) &&
  4984. cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) {
  4985. SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
  4986. if (auto BO = MatchBinaryOp(BEValueV, DT)) {
  4987. if (BO->Opcode == Instruction::Add && BO->LHS == PN) {
  4988. if (BO->IsNUW)
  4989. Flags = setFlags(Flags, SCEV::FlagNUW);
  4990. if (BO->IsNSW)
  4991. Flags = setFlags(Flags, SCEV::FlagNSW);
  4992. }
  4993. } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(BEValueV)) {
  4994. // If the increment is an inbounds GEP, then we know the address
  4995. // space cannot be wrapped around. We cannot make any guarantee
  4996. // about signed or unsigned overflow because pointers are
  4997. // unsigned but we may have a negative index from the base
  4998. // pointer. We can guarantee that no unsigned wrap occurs if the
  4999. // indices form a positive value.
  5000. if (GEP->isInBounds() && GEP->getOperand(0) == PN) {
  5001. Flags = setFlags(Flags, SCEV::FlagNW);
  5002. const SCEV *Ptr = getSCEV(GEP->getPointerOperand());
  5003. if (isKnownPositive(getMinusSCEV(getSCEV(GEP), Ptr)))
  5004. Flags = setFlags(Flags, SCEV::FlagNUW);
  5005. }
  5006. // We cannot transfer nuw and nsw flags from subtraction
  5007. // operations -- sub nuw X, Y is not the same as add nuw X, -Y
  5008. // for instance.
  5009. }
  5010. const SCEV *StartVal = getSCEV(StartValueV);
  5011. const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags);
  5012. // Okay, for the entire analysis of this edge we assumed the PHI
  5013. // to be symbolic. We now need to go back and purge all of the
  5014. // entries for the scalars that use the symbolic expression.
  5015. forgetMemoizedResults(SymbolicName);
  5016. insertValueToMap(PN, PHISCEV);
  5017. // We can add Flags to the post-inc expression only if we
  5018. // know that it is *undefined behavior* for BEValueV to
  5019. // overflow.
  5020. if (auto *BEInst = dyn_cast<Instruction>(BEValueV))
  5021. if (isLoopInvariant(Accum, L) && isAddRecNeverPoison(BEInst, L))
  5022. (void)getAddRecExpr(getAddExpr(StartVal, Accum), Accum, L, Flags);
  5023. return PHISCEV;
  5024. }
  5025. }
  5026. } else {
  5027. // Otherwise, this could be a loop like this:
  5028. // i = 0; for (j = 1; ..; ++j) { .... i = j; }
  5029. // In this case, j = {1,+,1} and BEValue is j.
  5030. // Because the other in-value of i (0) fits the evolution of BEValue
  5031. // i really is an addrec evolution.
  5032. //
  5033. // We can generalize this saying that i is the shifted value of BEValue
  5034. // by one iteration:
  5035. // PHI(f(0), f({1,+,1})) --> f({0,+,1})
  5036. const SCEV *Shifted = SCEVShiftRewriter::rewrite(BEValue, L, *this);
  5037. const SCEV *Start = SCEVInitRewriter::rewrite(Shifted, L, *this, false);
  5038. if (Shifted != getCouldNotCompute() &&
  5039. Start != getCouldNotCompute()) {
  5040. const SCEV *StartVal = getSCEV(StartValueV);
  5041. if (Start == StartVal) {
  5042. // Okay, for the entire analysis of this edge we assumed the PHI
  5043. // to be symbolic. We now need to go back and purge all of the
  5044. // entries for the scalars that use the symbolic expression.
  5045. forgetMemoizedResults(SymbolicName);
  5046. insertValueToMap(PN, Shifted);
  5047. return Shifted;
  5048. }
  5049. }
  5050. }
  5051. // Remove the temporary PHI node SCEV that has been inserted while intending
  5052. // to create an AddRecExpr for this PHI node. We can not keep this temporary
  5053. // as it will prevent later (possibly simpler) SCEV expressions to be added
  5054. // to the ValueExprMap.
  5055. eraseValueFromMap(PN);
  5056. return nullptr;
  5057. }
  5058. // Checks if the SCEV S is available at BB. S is considered available at BB
  5059. // if S can be materialized at BB without introducing a fault.
  5060. static bool IsAvailableOnEntry(const Loop *L, DominatorTree &DT, const SCEV *S,
  5061. BasicBlock *BB) {
  5062. struct CheckAvailable {
  5063. bool TraversalDone = false;
  5064. bool Available = true;
  5065. const Loop *L = nullptr; // The loop BB is in (can be nullptr)
  5066. BasicBlock *BB = nullptr;
  5067. DominatorTree &DT;
  5068. CheckAvailable(const Loop *L, BasicBlock *BB, DominatorTree &DT)
  5069. : L(L), BB(BB), DT(DT) {}
  5070. bool setUnavailable() {
  5071. TraversalDone = true;
  5072. Available = false;
  5073. return false;
  5074. }
  5075. bool follow(const SCEV *S) {
  5076. switch (S->getSCEVType()) {
  5077. case scConstant:
  5078. case scPtrToInt:
  5079. case scTruncate:
  5080. case scZeroExtend:
  5081. case scSignExtend:
  5082. case scAddExpr:
  5083. case scMulExpr:
  5084. case scUMaxExpr:
  5085. case scSMaxExpr:
  5086. case scUMinExpr:
  5087. case scSMinExpr:
  5088. case scSequentialUMinExpr:
  5089. // These expressions are available if their operand(s) is/are.
  5090. return true;
  5091. case scAddRecExpr: {
  5092. // We allow add recurrences that are on the loop BB is in, or some
  5093. // outer loop. This guarantees availability because the value of the
  5094. // add recurrence at BB is simply the "current" value of the induction
  5095. // variable. We can relax this in the future; for instance an add
  5096. // recurrence on a sibling dominating loop is also available at BB.
  5097. const auto *ARLoop = cast<SCEVAddRecExpr>(S)->getLoop();
  5098. if (L && (ARLoop == L || ARLoop->contains(L)))
  5099. return true;
  5100. return setUnavailable();
  5101. }
  5102. case scUnknown: {
  5103. // For SCEVUnknown, we check for simple dominance.
  5104. const auto *SU = cast<SCEVUnknown>(S);
  5105. Value *V = SU->getValue();
  5106. if (isa<Argument>(V))
  5107. return false;
  5108. if (isa<Instruction>(V) && DT.dominates(cast<Instruction>(V), BB))
  5109. return false;
  5110. return setUnavailable();
  5111. }
  5112. case scUDivExpr:
  5113. case scCouldNotCompute:
  5114. // We do not try to smart about these at all.
  5115. return setUnavailable();
  5116. }
  5117. llvm_unreachable("Unknown SCEV kind!");
  5118. }
  5119. bool isDone() { return TraversalDone; }
  5120. };
  5121. CheckAvailable CA(L, BB, DT);
  5122. SCEVTraversal<CheckAvailable> ST(CA);
  5123. ST.visitAll(S);
  5124. return CA.Available;
  5125. }
  5126. // Try to match a control flow sequence that branches out at BI and merges back
  5127. // at Merge into a "C ? LHS : RHS" select pattern. Return true on a successful
  5128. // match.
  5129. static bool BrPHIToSelect(DominatorTree &DT, BranchInst *BI, PHINode *Merge,
  5130. Value *&C, Value *&LHS, Value *&RHS) {
  5131. C = BI->getCondition();
  5132. BasicBlockEdge LeftEdge(BI->getParent(), BI->getSuccessor(0));
  5133. BasicBlockEdge RightEdge(BI->getParent(), BI->getSuccessor(1));
  5134. if (!LeftEdge.isSingleEdge())
  5135. return false;
  5136. assert(RightEdge.isSingleEdge() && "Follows from LeftEdge.isSingleEdge()");
  5137. Use &LeftUse = Merge->getOperandUse(0);
  5138. Use &RightUse = Merge->getOperandUse(1);
  5139. if (DT.dominates(LeftEdge, LeftUse) && DT.dominates(RightEdge, RightUse)) {
  5140. LHS = LeftUse;
  5141. RHS = RightUse;
  5142. return true;
  5143. }
  5144. if (DT.dominates(LeftEdge, RightUse) && DT.dominates(RightEdge, LeftUse)) {
  5145. LHS = RightUse;
  5146. RHS = LeftUse;
  5147. return true;
  5148. }
  5149. return false;
  5150. }
  5151. const SCEV *ScalarEvolution::createNodeFromSelectLikePHI(PHINode *PN) {
  5152. auto IsReachable =
  5153. [&](BasicBlock *BB) { return DT.isReachableFromEntry(BB); };
  5154. if (PN->getNumIncomingValues() == 2 && all_of(PN->blocks(), IsReachable)) {
  5155. const Loop *L = LI.getLoopFor(PN->getParent());
  5156. // We don't want to break LCSSA, even in a SCEV expression tree.
  5157. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
  5158. if (LI.getLoopFor(PN->getIncomingBlock(i)) != L)
  5159. return nullptr;
  5160. // Try to match
  5161. //
  5162. // br %cond, label %left, label %right
  5163. // left:
  5164. // br label %merge
  5165. // right:
  5166. // br label %merge
  5167. // merge:
  5168. // V = phi [ %x, %left ], [ %y, %right ]
  5169. //
  5170. // as "select %cond, %x, %y"
  5171. BasicBlock *IDom = DT[PN->getParent()]->getIDom()->getBlock();
  5172. assert(IDom && "At least the entry block should dominate PN");
  5173. auto *BI = dyn_cast<BranchInst>(IDom->getTerminator());
  5174. Value *Cond = nullptr, *LHS = nullptr, *RHS = nullptr;
  5175. if (BI && BI->isConditional() &&
  5176. BrPHIToSelect(DT, BI, PN, Cond, LHS, RHS) &&
  5177. IsAvailableOnEntry(L, DT, getSCEV(LHS), PN->getParent()) &&
  5178. IsAvailableOnEntry(L, DT, getSCEV(RHS), PN->getParent()))
  5179. return createNodeForSelectOrPHI(PN, Cond, LHS, RHS);
  5180. }
  5181. return nullptr;
  5182. }
  5183. const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
  5184. if (const SCEV *S = createAddRecFromPHI(PN))
  5185. return S;
  5186. if (const SCEV *S = createNodeFromSelectLikePHI(PN))
  5187. return S;
  5188. // If the PHI has a single incoming value, follow that value, unless the
  5189. // PHI's incoming blocks are in a different loop, in which case doing so
  5190. // risks breaking LCSSA form. Instcombine would normally zap these, but
  5191. // it doesn't have DominatorTree information, so it may miss cases.
  5192. if (Value *V = SimplifyInstruction(PN, {getDataLayout(), &TLI, &DT, &AC}))
  5193. if (LI.replacementPreservesLCSSAForm(PN, V))
  5194. return getSCEV(V);
  5195. // If it's not a loop phi, we can't handle it yet.
  5196. return getUnknown(PN);
  5197. }
  5198. const SCEV *ScalarEvolution::createNodeForSelectOrPHI(Instruction *I,
  5199. Value *Cond,
  5200. Value *TrueVal,
  5201. Value *FalseVal) {
  5202. // Handle "constant" branch or select. This can occur for instance when a
  5203. // loop pass transforms an inner loop and moves on to process the outer loop.
  5204. if (auto *CI = dyn_cast<ConstantInt>(Cond))
  5205. return getSCEV(CI->isOne() ? TrueVal : FalseVal);
  5206. // Try to match some simple smax or umax patterns.
  5207. auto *ICI = dyn_cast<ICmpInst>(Cond);
  5208. if (!ICI)
  5209. return getUnknown(I);
  5210. Value *LHS = ICI->getOperand(0);
  5211. Value *RHS = ICI->getOperand(1);
  5212. switch (ICI->getPredicate()) {
  5213. case ICmpInst::ICMP_SLT:
  5214. case ICmpInst::ICMP_SLE:
  5215. case ICmpInst::ICMP_ULT:
  5216. case ICmpInst::ICMP_ULE:
  5217. std::swap(LHS, RHS);
  5218. LLVM_FALLTHROUGH;
  5219. case ICmpInst::ICMP_SGT:
  5220. case ICmpInst::ICMP_SGE:
  5221. case ICmpInst::ICMP_UGT:
  5222. case ICmpInst::ICMP_UGE:
  5223. // a > b ? a+x : b+x -> max(a, b)+x
  5224. // a > b ? b+x : a+x -> min(a, b)+x
  5225. if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType())) {
  5226. bool Signed = ICI->isSigned();
  5227. const SCEV *LA = getSCEV(TrueVal);
  5228. const SCEV *RA = getSCEV(FalseVal);
  5229. const SCEV *LS = getSCEV(LHS);
  5230. const SCEV *RS = getSCEV(RHS);
  5231. if (LA->getType()->isPointerTy()) {
  5232. // FIXME: Handle cases where LS/RS are pointers not equal to LA/RA.
  5233. // Need to make sure we can't produce weird expressions involving
  5234. // negated pointers.
  5235. if (LA == LS && RA == RS)
  5236. return Signed ? getSMaxExpr(LS, RS) : getUMaxExpr(LS, RS);
  5237. if (LA == RS && RA == LS)
  5238. return Signed ? getSMinExpr(LS, RS) : getUMinExpr(LS, RS);
  5239. }
  5240. auto CoerceOperand = [&](const SCEV *Op) -> const SCEV * {
  5241. if (Op->getType()->isPointerTy()) {
  5242. Op = getLosslessPtrToIntExpr(Op);
  5243. if (isa<SCEVCouldNotCompute>(Op))
  5244. return Op;
  5245. }
  5246. if (Signed)
  5247. Op = getNoopOrSignExtend(Op, I->getType());
  5248. else
  5249. Op = getNoopOrZeroExtend(Op, I->getType());
  5250. return Op;
  5251. };
  5252. LS = CoerceOperand(LS);
  5253. RS = CoerceOperand(RS);
  5254. if (isa<SCEVCouldNotCompute>(LS) || isa<SCEVCouldNotCompute>(RS))
  5255. break;
  5256. const SCEV *LDiff = getMinusSCEV(LA, LS);
  5257. const SCEV *RDiff = getMinusSCEV(RA, RS);
  5258. if (LDiff == RDiff)
  5259. return getAddExpr(Signed ? getSMaxExpr(LS, RS) : getUMaxExpr(LS, RS),
  5260. LDiff);
  5261. LDiff = getMinusSCEV(LA, RS);
  5262. RDiff = getMinusSCEV(RA, LS);
  5263. if (LDiff == RDiff)
  5264. return getAddExpr(Signed ? getSMinExpr(LS, RS) : getUMinExpr(LS, RS),
  5265. LDiff);
  5266. }
  5267. break;
  5268. case ICmpInst::ICMP_NE:
  5269. // n != 0 ? n+x : 1+x -> umax(n, 1)+x
  5270. if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType()) &&
  5271. isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) {
  5272. const SCEV *One = getOne(I->getType());
  5273. const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), I->getType());
  5274. const SCEV *LA = getSCEV(TrueVal);
  5275. const SCEV *RA = getSCEV(FalseVal);
  5276. const SCEV *LDiff = getMinusSCEV(LA, LS);
  5277. const SCEV *RDiff = getMinusSCEV(RA, One);
  5278. if (LDiff == RDiff)
  5279. return getAddExpr(getUMaxExpr(One, LS), LDiff);
  5280. }
  5281. break;
  5282. case ICmpInst::ICMP_EQ:
  5283. // n == 0 ? 1+x : n+x -> umax(n, 1)+x
  5284. if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType()) &&
  5285. isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) {
  5286. const SCEV *One = getOne(I->getType());
  5287. const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), I->getType());
  5288. const SCEV *LA = getSCEV(TrueVal);
  5289. const SCEV *RA = getSCEV(FalseVal);
  5290. const SCEV *LDiff = getMinusSCEV(LA, One);
  5291. const SCEV *RDiff = getMinusSCEV(RA, LS);
  5292. if (LDiff == RDiff)
  5293. return getAddExpr(getUMaxExpr(One, LS), LDiff);
  5294. }
  5295. break;
  5296. default:
  5297. break;
  5298. }
  5299. return getUnknown(I);
  5300. }
  5301. /// Expand GEP instructions into add and multiply operations. This allows them
  5302. /// to be analyzed by regular SCEV code.
  5303. const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
  5304. // Don't attempt to analyze GEPs over unsized objects.
  5305. if (!GEP->getSourceElementType()->isSized())
  5306. return getUnknown(GEP);
  5307. SmallVector<const SCEV *, 4> IndexExprs;
  5308. for (Value *Index : GEP->indices())
  5309. IndexExprs.push_back(getSCEV(Index));
  5310. return getGEPExpr(GEP, IndexExprs);
  5311. }
  5312. uint32_t ScalarEvolution::GetMinTrailingZerosImpl(const SCEV *S) {
  5313. if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
  5314. return C->getAPInt().countTrailingZeros();
  5315. if (const SCEVPtrToIntExpr *I = dyn_cast<SCEVPtrToIntExpr>(S))
  5316. return GetMinTrailingZeros(I->getOperand());
  5317. if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(S))
  5318. return std::min(GetMinTrailingZeros(T->getOperand()),
  5319. (uint32_t)getTypeSizeInBits(T->getType()));
  5320. if (const SCEVZeroExtendExpr *E = dyn_cast<SCEVZeroExtendExpr>(S)) {
  5321. uint32_t OpRes = GetMinTrailingZeros(E->getOperand());
  5322. return OpRes == getTypeSizeInBits(E->getOperand()->getType())
  5323. ? getTypeSizeInBits(E->getType())
  5324. : OpRes;
  5325. }
  5326. if (const SCEVSignExtendExpr *E = dyn_cast<SCEVSignExtendExpr>(S)) {
  5327. uint32_t OpRes = GetMinTrailingZeros(E->getOperand());
  5328. return OpRes == getTypeSizeInBits(E->getOperand()->getType())
  5329. ? getTypeSizeInBits(E->getType())
  5330. : OpRes;
  5331. }
  5332. if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) {
  5333. // The result is the min of all operands results.
  5334. uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0));
  5335. for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i)
  5336. MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i)));
  5337. return MinOpRes;
  5338. }
  5339. if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {
  5340. // The result is the sum of all operands results.
  5341. uint32_t SumOpRes = GetMinTrailingZeros(M->getOperand(0));
  5342. uint32_t BitWidth = getTypeSizeInBits(M->getType());
  5343. for (unsigned i = 1, e = M->getNumOperands();
  5344. SumOpRes != BitWidth && i != e; ++i)
  5345. SumOpRes =
  5346. std::min(SumOpRes + GetMinTrailingZeros(M->getOperand(i)), BitWidth);
  5347. return SumOpRes;
  5348. }
  5349. if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) {
  5350. // The result is the min of all operands results.
  5351. uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0));
  5352. for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i)
  5353. MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i)));
  5354. return MinOpRes;
  5355. }
  5356. if (const SCEVSMaxExpr *M = dyn_cast<SCEVSMaxExpr>(S)) {
  5357. // The result is the min of all operands results.
  5358. uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0));
  5359. for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i)
  5360. MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i)));
  5361. return MinOpRes;
  5362. }
  5363. if (const SCEVUMaxExpr *M = dyn_cast<SCEVUMaxExpr>(S)) {
  5364. // The result is the min of all operands results.
  5365. uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0));
  5366. for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i)
  5367. MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i)));
  5368. return MinOpRes;
  5369. }
  5370. if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
  5371. // For a SCEVUnknown, ask ValueTracking.
  5372. KnownBits Known = computeKnownBits(U->getValue(), getDataLayout(), 0, &AC, nullptr, &DT);
  5373. return Known.countMinTrailingZeros();
  5374. }
  5375. // SCEVUDivExpr
  5376. return 0;
  5377. }
  5378. uint32_t ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
  5379. auto I = MinTrailingZerosCache.find(S);
  5380. if (I != MinTrailingZerosCache.end())
  5381. return I->second;
  5382. uint32_t Result = GetMinTrailingZerosImpl(S);
  5383. auto InsertPair = MinTrailingZerosCache.insert({S, Result});
  5384. assert(InsertPair.second && "Should insert a new key");
  5385. return InsertPair.first->second;
  5386. }
  5387. /// Helper method to assign a range to V from metadata present in the IR.
  5388. static Optional<ConstantRange> GetRangeFromMetadata(Value *V) {
  5389. if (Instruction *I = dyn_cast<Instruction>(V))
  5390. if (MDNode *MD = I->getMetadata(LLVMContext::MD_range))
  5391. return getConstantRangeFromMetadata(*MD);
  5392. return None;
  5393. }
  5394. void ScalarEvolution::setNoWrapFlags(SCEVAddRecExpr *AddRec,
  5395. SCEV::NoWrapFlags Flags) {
  5396. if (AddRec->getNoWrapFlags(Flags) != Flags) {
  5397. AddRec->setNoWrapFlags(Flags);
  5398. UnsignedRanges.erase(AddRec);
  5399. SignedRanges.erase(AddRec);
  5400. }
  5401. }
  5402. ConstantRange ScalarEvolution::
  5403. getRangeForUnknownRecurrence(const SCEVUnknown *U) {
  5404. const DataLayout &DL = getDataLayout();
  5405. unsigned BitWidth = getTypeSizeInBits(U->getType());
  5406. const ConstantRange FullSet(BitWidth, /*isFullSet=*/true);
  5407. // Match a simple recurrence of the form: <start, ShiftOp, Step>, and then
  5408. // use information about the trip count to improve our available range. Note
  5409. // that the trip count independent cases are already handled by known bits.
  5410. // WARNING: The definition of recurrence used here is subtly different than
  5411. // the one used by AddRec (and thus most of this file). Step is allowed to
  5412. // be arbitrarily loop varying here, where AddRec allows only loop invariant
  5413. // and other addrecs in the same loop (for non-affine addrecs). The code
  5414. // below intentionally handles the case where step is not loop invariant.
  5415. auto *P = dyn_cast<PHINode>(U->getValue());
  5416. if (!P)
  5417. return FullSet;
  5418. // Make sure that no Phi input comes from an unreachable block. Otherwise,
  5419. // even the values that are not available in these blocks may come from them,
  5420. // and this leads to false-positive recurrence test.
  5421. for (auto *Pred : predecessors(P->getParent()))
  5422. if (!DT.isReachableFromEntry(Pred))
  5423. return FullSet;
  5424. BinaryOperator *BO;
  5425. Value *Start, *Step;
  5426. if (!matchSimpleRecurrence(P, BO, Start, Step))
  5427. return FullSet;
  5428. // If we found a recurrence in reachable code, we must be in a loop. Note
  5429. // that BO might be in some subloop of L, and that's completely okay.
  5430. auto *L = LI.getLoopFor(P->getParent());
  5431. assert(L && L->getHeader() == P->getParent());
  5432. if (!L->contains(BO->getParent()))
  5433. // NOTE: This bailout should be an assert instead. However, asserting
  5434. // the condition here exposes a case where LoopFusion is querying SCEV
  5435. // with malformed loop information during the midst of the transform.
  5436. // There doesn't appear to be an obvious fix, so for the moment bailout
  5437. // until the caller issue can be fixed. PR49566 tracks the bug.
  5438. return FullSet;
  5439. // TODO: Extend to other opcodes such as mul, and div
  5440. switch (BO->getOpcode()) {
  5441. default:
  5442. return FullSet;
  5443. case Instruction::AShr:
  5444. case Instruction::LShr:
  5445. case Instruction::Shl:
  5446. break;
  5447. };
  5448. if (BO->getOperand(0) != P)
  5449. // TODO: Handle the power function forms some day.
  5450. return FullSet;
  5451. unsigned TC = getSmallConstantMaxTripCount(L);
  5452. if (!TC || TC >= BitWidth)
  5453. return FullSet;
  5454. auto KnownStart = computeKnownBits(Start, DL, 0, &AC, nullptr, &DT);
  5455. auto KnownStep = computeKnownBits(Step, DL, 0, &AC, nullptr, &DT);
  5456. assert(KnownStart.getBitWidth() == BitWidth &&
  5457. KnownStep.getBitWidth() == BitWidth);
  5458. // Compute total shift amount, being careful of overflow and bitwidths.
  5459. auto MaxShiftAmt = KnownStep.getMaxValue();
  5460. APInt TCAP(BitWidth, TC-1);
  5461. bool Overflow = false;
  5462. auto TotalShift = MaxShiftAmt.umul_ov(TCAP, Overflow);
  5463. if (Overflow)
  5464. return FullSet;
  5465. switch (BO->getOpcode()) {
  5466. default:
  5467. llvm_unreachable("filtered out above");
  5468. case Instruction::AShr: {
  5469. // For each ashr, three cases:
  5470. // shift = 0 => unchanged value
  5471. // saturation => 0 or -1
  5472. // other => a value closer to zero (of the same sign)
  5473. // Thus, the end value is closer to zero than the start.
  5474. auto KnownEnd = KnownBits::ashr(KnownStart,
  5475. KnownBits::makeConstant(TotalShift));
  5476. if (KnownStart.isNonNegative())
  5477. // Analogous to lshr (simply not yet canonicalized)
  5478. return ConstantRange::getNonEmpty(KnownEnd.getMinValue(),
  5479. KnownStart.getMaxValue() + 1);
  5480. if (KnownStart.isNegative())
  5481. // End >=u Start && End <=s Start
  5482. return ConstantRange::getNonEmpty(KnownStart.getMinValue(),
  5483. KnownEnd.getMaxValue() + 1);
  5484. break;
  5485. }
  5486. case Instruction::LShr: {
  5487. // For each lshr, three cases:
  5488. // shift = 0 => unchanged value
  5489. // saturation => 0
  5490. // other => a smaller positive number
  5491. // Thus, the low end of the unsigned range is the last value produced.
  5492. auto KnownEnd = KnownBits::lshr(KnownStart,
  5493. KnownBits::makeConstant(TotalShift));
  5494. return ConstantRange::getNonEmpty(KnownEnd.getMinValue(),
  5495. KnownStart.getMaxValue() + 1);
  5496. }
  5497. case Instruction::Shl: {
  5498. // Iff no bits are shifted out, value increases on every shift.
  5499. auto KnownEnd = KnownBits::shl(KnownStart,
  5500. KnownBits::makeConstant(TotalShift));
  5501. if (TotalShift.ult(KnownStart.countMinLeadingZeros()))
  5502. return ConstantRange(KnownStart.getMinValue(),
  5503. KnownEnd.getMaxValue() + 1);
  5504. break;
  5505. }
  5506. };
  5507. return FullSet;
  5508. }
  5509. /// Determine the range for a particular SCEV. If SignHint is
  5510. /// HINT_RANGE_UNSIGNED (resp. HINT_RANGE_SIGNED) then getRange prefers ranges
  5511. /// with a "cleaner" unsigned (resp. signed) representation.
  5512. const ConstantRange &
  5513. ScalarEvolution::getRangeRef(const SCEV *S,
  5514. ScalarEvolution::RangeSignHint SignHint) {
  5515. DenseMap<const SCEV *, ConstantRange> &Cache =
  5516. SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED ? UnsignedRanges
  5517. : SignedRanges;
  5518. ConstantRange::PreferredRangeType RangeType =
  5519. SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED
  5520. ? ConstantRange::Unsigned : ConstantRange::Signed;
  5521. // See if we've computed this range already.
  5522. DenseMap<const SCEV *, ConstantRange>::iterator I = Cache.find(S);
  5523. if (I != Cache.end())
  5524. return I->second;
  5525. if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
  5526. return setRange(C, SignHint, ConstantRange(C->getAPInt()));
  5527. unsigned BitWidth = getTypeSizeInBits(S->getType());
  5528. ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true);
  5529. using OBO = OverflowingBinaryOperator;
  5530. // If the value has known zeros, the maximum value will have those known zeros
  5531. // as well.
  5532. uint32_t TZ = GetMinTrailingZeros(S);
  5533. if (TZ != 0) {
  5534. if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED)
  5535. ConservativeResult =
  5536. ConstantRange(APInt::getMinValue(BitWidth),
  5537. APInt::getMaxValue(BitWidth).lshr(TZ).shl(TZ) + 1);
  5538. else
  5539. ConservativeResult = ConstantRange(
  5540. APInt::getSignedMinValue(BitWidth),
  5541. APInt::getSignedMaxValue(BitWidth).ashr(TZ).shl(TZ) + 1);
  5542. }
  5543. if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
  5544. ConstantRange X = getRangeRef(Add->getOperand(0), SignHint);
  5545. unsigned WrapType = OBO::AnyWrap;
  5546. if (Add->hasNoSignedWrap())
  5547. WrapType |= OBO::NoSignedWrap;
  5548. if (Add->hasNoUnsignedWrap())
  5549. WrapType |= OBO::NoUnsignedWrap;
  5550. for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i)
  5551. X = X.addWithNoWrap(getRangeRef(Add->getOperand(i), SignHint),
  5552. WrapType, RangeType);
  5553. return setRange(Add, SignHint,
  5554. ConservativeResult.intersectWith(X, RangeType));
  5555. }
  5556. if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
  5557. ConstantRange X = getRangeRef(Mul->getOperand(0), SignHint);
  5558. for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i)
  5559. X = X.multiply(getRangeRef(Mul->getOperand(i), SignHint));
  5560. return setRange(Mul, SignHint,
  5561. ConservativeResult.intersectWith(X, RangeType));
  5562. }
  5563. if (isa<SCEVMinMaxExpr>(S) || isa<SCEVSequentialMinMaxExpr>(S)) {
  5564. Intrinsic::ID ID;
  5565. switch (S->getSCEVType()) {
  5566. case scUMaxExpr:
  5567. ID = Intrinsic::umax;
  5568. break;
  5569. case scSMaxExpr:
  5570. ID = Intrinsic::smax;
  5571. break;
  5572. case scUMinExpr:
  5573. case scSequentialUMinExpr:
  5574. ID = Intrinsic::umin;
  5575. break;
  5576. case scSMinExpr:
  5577. ID = Intrinsic::smin;
  5578. break;
  5579. default:
  5580. llvm_unreachable("Unknown SCEVMinMaxExpr/SCEVSequentialMinMaxExpr.");
  5581. }
  5582. const auto *NAry = cast<SCEVNAryExpr>(S);
  5583. ConstantRange X = getRangeRef(NAry->getOperand(0), SignHint);
  5584. for (unsigned i = 1, e = NAry->getNumOperands(); i != e; ++i)
  5585. X = X.intrinsic(ID, {X, getRangeRef(NAry->getOperand(i), SignHint)});
  5586. return setRange(S, SignHint,
  5587. ConservativeResult.intersectWith(X, RangeType));
  5588. }
  5589. if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
  5590. ConstantRange X = getRangeRef(UDiv->getLHS(), SignHint);
  5591. ConstantRange Y = getRangeRef(UDiv->getRHS(), SignHint);
  5592. return setRange(UDiv, SignHint,
  5593. ConservativeResult.intersectWith(X.udiv(Y), RangeType));
  5594. }
  5595. if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) {
  5596. ConstantRange X = getRangeRef(ZExt->getOperand(), SignHint);
  5597. return setRange(ZExt, SignHint,
  5598. ConservativeResult.intersectWith(X.zeroExtend(BitWidth),
  5599. RangeType));
  5600. }
  5601. if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) {
  5602. ConstantRange X = getRangeRef(SExt->getOperand(), SignHint);
  5603. return setRange(SExt, SignHint,
  5604. ConservativeResult.intersectWith(X.signExtend(BitWidth),
  5605. RangeType));
  5606. }
  5607. if (const SCEVPtrToIntExpr *PtrToInt = dyn_cast<SCEVPtrToIntExpr>(S)) {
  5608. ConstantRange X = getRangeRef(PtrToInt->getOperand(), SignHint);
  5609. return setRange(PtrToInt, SignHint, X);
  5610. }
  5611. if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) {
  5612. ConstantRange X = getRangeRef(Trunc->getOperand(), SignHint);
  5613. return setRange(Trunc, SignHint,
  5614. ConservativeResult.intersectWith(X.truncate(BitWidth),
  5615. RangeType));
  5616. }
  5617. if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
  5618. // If there's no unsigned wrap, the value will never be less than its
  5619. // initial value.
  5620. if (AddRec->hasNoUnsignedWrap()) {
  5621. APInt UnsignedMinValue = getUnsignedRangeMin(AddRec->getStart());
  5622. if (!UnsignedMinValue.isZero())
  5623. ConservativeResult = ConservativeResult.intersectWith(
  5624. ConstantRange(UnsignedMinValue, APInt(BitWidth, 0)), RangeType);
  5625. }
  5626. // If there's no signed wrap, and all the operands except initial value have
  5627. // the same sign or zero, the value won't ever be:
  5628. // 1: smaller than initial value if operands are non negative,
  5629. // 2: bigger than initial value if operands are non positive.
  5630. // For both cases, value can not cross signed min/max boundary.
  5631. if (AddRec->hasNoSignedWrap()) {
  5632. bool AllNonNeg = true;
  5633. bool AllNonPos = true;
  5634. for (unsigned i = 1, e = AddRec->getNumOperands(); i != e; ++i) {
  5635. if (!isKnownNonNegative(AddRec->getOperand(i)))
  5636. AllNonNeg = false;
  5637. if (!isKnownNonPositive(AddRec->getOperand(i)))
  5638. AllNonPos = false;
  5639. }
  5640. if (AllNonNeg)
  5641. ConservativeResult = ConservativeResult.intersectWith(
  5642. ConstantRange::getNonEmpty(getSignedRangeMin(AddRec->getStart()),
  5643. APInt::getSignedMinValue(BitWidth)),
  5644. RangeType);
  5645. else if (AllNonPos)
  5646. ConservativeResult = ConservativeResult.intersectWith(
  5647. ConstantRange::getNonEmpty(
  5648. APInt::getSignedMinValue(BitWidth),
  5649. getSignedRangeMax(AddRec->getStart()) + 1),
  5650. RangeType);
  5651. }
  5652. // TODO: non-affine addrec
  5653. if (AddRec->isAffine()) {
  5654. const SCEV *MaxBECount = getConstantMaxBackedgeTakenCount(AddRec->getLoop());
  5655. if (!isa<SCEVCouldNotCompute>(MaxBECount) &&
  5656. getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) {
  5657. auto RangeFromAffine = getRangeForAffineAR(
  5658. AddRec->getStart(), AddRec->getStepRecurrence(*this), MaxBECount,
  5659. BitWidth);
  5660. ConservativeResult =
  5661. ConservativeResult.intersectWith(RangeFromAffine, RangeType);
  5662. auto RangeFromFactoring = getRangeViaFactoring(
  5663. AddRec->getStart(), AddRec->getStepRecurrence(*this), MaxBECount,
  5664. BitWidth);
  5665. ConservativeResult =
  5666. ConservativeResult.intersectWith(RangeFromFactoring, RangeType);
  5667. }
  5668. // Now try symbolic BE count and more powerful methods.
  5669. if (UseExpensiveRangeSharpening) {
  5670. const SCEV *SymbolicMaxBECount =
  5671. getSymbolicMaxBackedgeTakenCount(AddRec->getLoop());
  5672. if (!isa<SCEVCouldNotCompute>(SymbolicMaxBECount) &&
  5673. getTypeSizeInBits(MaxBECount->getType()) <= BitWidth &&
  5674. AddRec->hasNoSelfWrap()) {
  5675. auto RangeFromAffineNew = getRangeForAffineNoSelfWrappingAR(
  5676. AddRec, SymbolicMaxBECount, BitWidth, SignHint);
  5677. ConservativeResult =
  5678. ConservativeResult.intersectWith(RangeFromAffineNew, RangeType);
  5679. }
  5680. }
  5681. }
  5682. return setRange(AddRec, SignHint, std::move(ConservativeResult));
  5683. }
  5684. if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
  5685. // Check if the IR explicitly contains !range metadata.
  5686. Optional<ConstantRange> MDRange = GetRangeFromMetadata(U->getValue());
  5687. if (MDRange.hasValue())
  5688. ConservativeResult = ConservativeResult.intersectWith(MDRange.getValue(),
  5689. RangeType);
  5690. // Use facts about recurrences in the underlying IR. Note that add
  5691. // recurrences are AddRecExprs and thus don't hit this path. This
  5692. // primarily handles shift recurrences.
  5693. auto CR = getRangeForUnknownRecurrence(U);
  5694. ConservativeResult = ConservativeResult.intersectWith(CR);
  5695. // See if ValueTracking can give us a useful range.
  5696. const DataLayout &DL = getDataLayout();
  5697. KnownBits Known = computeKnownBits(U->getValue(), DL, 0, &AC, nullptr, &DT);
  5698. if (Known.getBitWidth() != BitWidth)
  5699. Known = Known.zextOrTrunc(BitWidth);
  5700. // ValueTracking may be able to compute a tighter result for the number of
  5701. // sign bits than for the value of those sign bits.
  5702. unsigned NS = ComputeNumSignBits(U->getValue(), DL, 0, &AC, nullptr, &DT);
  5703. if (U->getType()->isPointerTy()) {
  5704. // If the pointer size is larger than the index size type, this can cause
  5705. // NS to be larger than BitWidth. So compensate for this.
  5706. unsigned ptrSize = DL.getPointerTypeSizeInBits(U->getType());
  5707. int ptrIdxDiff = ptrSize - BitWidth;
  5708. if (ptrIdxDiff > 0 && ptrSize > BitWidth && NS > (unsigned)ptrIdxDiff)
  5709. NS -= ptrIdxDiff;
  5710. }
  5711. if (NS > 1) {
  5712. // If we know any of the sign bits, we know all of the sign bits.
  5713. if (!Known.Zero.getHiBits(NS).isZero())
  5714. Known.Zero.setHighBits(NS);
  5715. if (!Known.One.getHiBits(NS).isZero())
  5716. Known.One.setHighBits(NS);
  5717. }
  5718. if (Known.getMinValue() != Known.getMaxValue() + 1)
  5719. ConservativeResult = ConservativeResult.intersectWith(
  5720. ConstantRange(Known.getMinValue(), Known.getMaxValue() + 1),
  5721. RangeType);
  5722. if (NS > 1)
  5723. ConservativeResult = ConservativeResult.intersectWith(
  5724. ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1),
  5725. APInt::getSignedMaxValue(BitWidth).ashr(NS - 1) + 1),
  5726. RangeType);
  5727. // A range of Phi is a subset of union of all ranges of its input.
  5728. if (const PHINode *Phi = dyn_cast<PHINode>(U->getValue())) {
  5729. // Make sure that we do not run over cycled Phis.
  5730. if (PendingPhiRanges.insert(Phi).second) {
  5731. ConstantRange RangeFromOps(BitWidth, /*isFullSet=*/false);
  5732. for (auto &Op : Phi->operands()) {
  5733. auto OpRange = getRangeRef(getSCEV(Op), SignHint);
  5734. RangeFromOps = RangeFromOps.unionWith(OpRange);
  5735. // No point to continue if we already have a full set.
  5736. if (RangeFromOps.isFullSet())
  5737. break;
  5738. }
  5739. ConservativeResult =
  5740. ConservativeResult.intersectWith(RangeFromOps, RangeType);
  5741. bool Erased = PendingPhiRanges.erase(Phi);
  5742. assert(Erased && "Failed to erase Phi properly?");
  5743. (void) Erased;
  5744. }
  5745. }
  5746. return setRange(U, SignHint, std::move(ConservativeResult));
  5747. }
  5748. return setRange(S, SignHint, std::move(ConservativeResult));
  5749. }
  5750. // Given a StartRange, Step and MaxBECount for an expression compute a range of
  5751. // values that the expression can take. Initially, the expression has a value
  5752. // from StartRange and then is changed by Step up to MaxBECount times. Signed
  5753. // argument defines if we treat Step as signed or unsigned.
  5754. static ConstantRange getRangeForAffineARHelper(APInt Step,
  5755. const ConstantRange &StartRange,
  5756. const APInt &MaxBECount,
  5757. unsigned BitWidth, bool Signed) {
  5758. // If either Step or MaxBECount is 0, then the expression won't change, and we
  5759. // just need to return the initial range.
  5760. if (Step == 0 || MaxBECount == 0)
  5761. return StartRange;
  5762. // If we don't know anything about the initial value (i.e. StartRange is
  5763. // FullRange), then we don't know anything about the final range either.
  5764. // Return FullRange.
  5765. if (StartRange.isFullSet())
  5766. return ConstantRange::getFull(BitWidth);
  5767. // If Step is signed and negative, then we use its absolute value, but we also
  5768. // note that we're moving in the opposite direction.
  5769. bool Descending = Signed && Step.isNegative();
  5770. if (Signed)
  5771. // This is correct even for INT_SMIN. Let's look at i8 to illustrate this:
  5772. // abs(INT_SMIN) = abs(-128) = abs(0x80) = -0x80 = 0x80 = 128.
  5773. // This equations hold true due to the well-defined wrap-around behavior of
  5774. // APInt.
  5775. Step = Step.abs();
  5776. // Check if Offset is more than full span of BitWidth. If it is, the
  5777. // expression is guaranteed to overflow.
  5778. if (APInt::getMaxValue(StartRange.getBitWidth()).udiv(Step).ult(MaxBECount))
  5779. return ConstantRange::getFull(BitWidth);
  5780. // Offset is by how much the expression can change. Checks above guarantee no
  5781. // overflow here.
  5782. APInt Offset = Step * MaxBECount;
  5783. // Minimum value of the final range will match the minimal value of StartRange
  5784. // if the expression is increasing and will be decreased by Offset otherwise.
  5785. // Maximum value of the final range will match the maximal value of StartRange
  5786. // if the expression is decreasing and will be increased by Offset otherwise.
  5787. APInt StartLower = StartRange.getLower();
  5788. APInt StartUpper = StartRange.getUpper() - 1;
  5789. APInt MovedBoundary = Descending ? (StartLower - std::move(Offset))
  5790. : (StartUpper + std::move(Offset));
  5791. // It's possible that the new minimum/maximum value will fall into the initial
  5792. // range (due to wrap around). This means that the expression can take any
  5793. // value in this bitwidth, and we have to return full range.
  5794. if (StartRange.contains(MovedBoundary))
  5795. return ConstantRange::getFull(BitWidth);
  5796. APInt NewLower =
  5797. Descending ? std::move(MovedBoundary) : std::move(StartLower);
  5798. APInt NewUpper =
  5799. Descending ? std::move(StartUpper) : std::move(MovedBoundary);
  5800. NewUpper += 1;
  5801. // No overflow detected, return [StartLower, StartUpper + Offset + 1) range.
  5802. return ConstantRange::getNonEmpty(std::move(NewLower), std::move(NewUpper));
  5803. }
  5804. ConstantRange ScalarEvolution::getRangeForAffineAR(const SCEV *Start,
  5805. const SCEV *Step,
  5806. const SCEV *MaxBECount,
  5807. unsigned BitWidth) {
  5808. assert(!isa<SCEVCouldNotCompute>(MaxBECount) &&
  5809. getTypeSizeInBits(MaxBECount->getType()) <= BitWidth &&
  5810. "Precondition!");
  5811. MaxBECount = getNoopOrZeroExtend(MaxBECount, Start->getType());
  5812. APInt MaxBECountValue = getUnsignedRangeMax(MaxBECount);
  5813. // First, consider step signed.
  5814. ConstantRange StartSRange = getSignedRange(Start);
  5815. ConstantRange StepSRange = getSignedRange(Step);
  5816. // If Step can be both positive and negative, we need to find ranges for the
  5817. // maximum absolute step values in both directions and union them.
  5818. ConstantRange SR =
  5819. getRangeForAffineARHelper(StepSRange.getSignedMin(), StartSRange,
  5820. MaxBECountValue, BitWidth, /* Signed = */ true);
  5821. SR = SR.unionWith(getRangeForAffineARHelper(StepSRange.getSignedMax(),
  5822. StartSRange, MaxBECountValue,
  5823. BitWidth, /* Signed = */ true));
  5824. // Next, consider step unsigned.
  5825. ConstantRange UR = getRangeForAffineARHelper(
  5826. getUnsignedRangeMax(Step), getUnsignedRange(Start),
  5827. MaxBECountValue, BitWidth, /* Signed = */ false);
  5828. // Finally, intersect signed and unsigned ranges.
  5829. return SR.intersectWith(UR, ConstantRange::Smallest);
  5830. }
  5831. ConstantRange ScalarEvolution::getRangeForAffineNoSelfWrappingAR(
  5832. const SCEVAddRecExpr *AddRec, const SCEV *MaxBECount, unsigned BitWidth,
  5833. ScalarEvolution::RangeSignHint SignHint) {
  5834. assert(AddRec->isAffine() && "Non-affine AddRecs are not suppored!\n");
  5835. assert(AddRec->hasNoSelfWrap() &&
  5836. "This only works for non-self-wrapping AddRecs!");
  5837. const bool IsSigned = SignHint == HINT_RANGE_SIGNED;
  5838. const SCEV *Step = AddRec->getStepRecurrence(*this);
  5839. // Only deal with constant step to save compile time.
  5840. if (!isa<SCEVConstant>(Step))
  5841. return ConstantRange::getFull(BitWidth);
  5842. // Let's make sure that we can prove that we do not self-wrap during
  5843. // MaxBECount iterations. We need this because MaxBECount is a maximum
  5844. // iteration count estimate, and we might infer nw from some exit for which we
  5845. // do not know max exit count (or any other side reasoning).
  5846. // TODO: Turn into assert at some point.
  5847. if (getTypeSizeInBits(MaxBECount->getType()) >
  5848. getTypeSizeInBits(AddRec->getType()))
  5849. return ConstantRange::getFull(BitWidth);
  5850. MaxBECount = getNoopOrZeroExtend(MaxBECount, AddRec->getType());
  5851. const SCEV *RangeWidth = getMinusOne(AddRec->getType());
  5852. const SCEV *StepAbs = getUMinExpr(Step, getNegativeSCEV(Step));
  5853. const SCEV *MaxItersWithoutWrap = getUDivExpr(RangeWidth, StepAbs);
  5854. if (!isKnownPredicateViaConstantRanges(ICmpInst::ICMP_ULE, MaxBECount,
  5855. MaxItersWithoutWrap))
  5856. return ConstantRange::getFull(BitWidth);
  5857. ICmpInst::Predicate LEPred =
  5858. IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
  5859. ICmpInst::Predicate GEPred =
  5860. IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
  5861. const SCEV *End = AddRec->evaluateAtIteration(MaxBECount, *this);
  5862. // We know that there is no self-wrap. Let's take Start and End values and
  5863. // look at all intermediate values V1, V2, ..., Vn that IndVar takes during
  5864. // the iteration. They either lie inside the range [Min(Start, End),
  5865. // Max(Start, End)] or outside it:
  5866. //
  5867. // Case 1: RangeMin ... Start V1 ... VN End ... RangeMax;
  5868. // Case 2: RangeMin Vk ... V1 Start ... End Vn ... Vk + 1 RangeMax;
  5869. //
  5870. // No self wrap flag guarantees that the intermediate values cannot be BOTH
  5871. // outside and inside the range [Min(Start, End), Max(Start, End)]. Using that
  5872. // knowledge, let's try to prove that we are dealing with Case 1. It is so if
  5873. // Start <= End and step is positive, or Start >= End and step is negative.
  5874. const SCEV *Start = AddRec->getStart();
  5875. ConstantRange StartRange = getRangeRef(Start, SignHint);
  5876. ConstantRange EndRange = getRangeRef(End, SignHint);
  5877. ConstantRange RangeBetween = StartRange.unionWith(EndRange);
  5878. // If they already cover full iteration space, we will know nothing useful
  5879. // even if we prove what we want to prove.
  5880. if (RangeBetween.isFullSet())
  5881. return RangeBetween;
  5882. // Only deal with ranges that do not wrap (i.e. RangeMin < RangeMax).
  5883. bool IsWrappedSet = IsSigned ? RangeBetween.isSignWrappedSet()
  5884. : RangeBetween.isWrappedSet();
  5885. if (IsWrappedSet)
  5886. return ConstantRange::getFull(BitWidth);
  5887. if (isKnownPositive(Step) &&
  5888. isKnownPredicateViaConstantRanges(LEPred, Start, End))
  5889. return RangeBetween;
  5890. else if (isKnownNegative(Step) &&
  5891. isKnownPredicateViaConstantRanges(GEPred, Start, End))
  5892. return RangeBetween;
  5893. return ConstantRange::getFull(BitWidth);
  5894. }
  5895. ConstantRange ScalarEvolution::getRangeViaFactoring(const SCEV *Start,
  5896. const SCEV *Step,
  5897. const SCEV *MaxBECount,
  5898. unsigned BitWidth) {
  5899. // RangeOf({C?A:B,+,C?P:Q}) == RangeOf(C?{A,+,P}:{B,+,Q})
  5900. // == RangeOf({A,+,P}) union RangeOf({B,+,Q})
  5901. struct SelectPattern {
  5902. Value *Condition = nullptr;
  5903. APInt TrueValue;
  5904. APInt FalseValue;
  5905. explicit SelectPattern(ScalarEvolution &SE, unsigned BitWidth,
  5906. const SCEV *S) {
  5907. Optional<unsigned> CastOp;
  5908. APInt Offset(BitWidth, 0);
  5909. assert(SE.getTypeSizeInBits(S->getType()) == BitWidth &&
  5910. "Should be!");
  5911. // Peel off a constant offset:
  5912. if (auto *SA = dyn_cast<SCEVAddExpr>(S)) {
  5913. // In the future we could consider being smarter here and handle
  5914. // {Start+Step,+,Step} too.
  5915. if (SA->getNumOperands() != 2 || !isa<SCEVConstant>(SA->getOperand(0)))
  5916. return;
  5917. Offset = cast<SCEVConstant>(SA->getOperand(0))->getAPInt();
  5918. S = SA->getOperand(1);
  5919. }
  5920. // Peel off a cast operation
  5921. if (auto *SCast = dyn_cast<SCEVIntegralCastExpr>(S)) {
  5922. CastOp = SCast->getSCEVType();
  5923. S = SCast->getOperand();
  5924. }
  5925. using namespace llvm::PatternMatch;
  5926. auto *SU = dyn_cast<SCEVUnknown>(S);
  5927. const APInt *TrueVal, *FalseVal;
  5928. if (!SU ||
  5929. !match(SU->getValue(), m_Select(m_Value(Condition), m_APInt(TrueVal),
  5930. m_APInt(FalseVal)))) {
  5931. Condition = nullptr;
  5932. return;
  5933. }
  5934. TrueValue = *TrueVal;
  5935. FalseValue = *FalseVal;
  5936. // Re-apply the cast we peeled off earlier
  5937. if (CastOp.hasValue())
  5938. switch (*CastOp) {
  5939. default:
  5940. llvm_unreachable("Unknown SCEV cast type!");
  5941. case scTruncate:
  5942. TrueValue = TrueValue.trunc(BitWidth);
  5943. FalseValue = FalseValue.trunc(BitWidth);
  5944. break;
  5945. case scZeroExtend:
  5946. TrueValue = TrueValue.zext(BitWidth);
  5947. FalseValue = FalseValue.zext(BitWidth);
  5948. break;
  5949. case scSignExtend:
  5950. TrueValue = TrueValue.sext(BitWidth);
  5951. FalseValue = FalseValue.sext(BitWidth);
  5952. break;
  5953. }
  5954. // Re-apply the constant offset we peeled off earlier
  5955. TrueValue += Offset;
  5956. FalseValue += Offset;
  5957. }
  5958. bool isRecognized() { return Condition != nullptr; }
  5959. };
  5960. SelectPattern StartPattern(*this, BitWidth, Start);
  5961. if (!StartPattern.isRecognized())
  5962. return ConstantRange::getFull(BitWidth);
  5963. SelectPattern StepPattern(*this, BitWidth, Step);
  5964. if (!StepPattern.isRecognized())
  5965. return ConstantRange::getFull(BitWidth);
  5966. if (StartPattern.Condition != StepPattern.Condition) {
  5967. // We don't handle this case today; but we could, by considering four
  5968. // possibilities below instead of two. I'm not sure if there are cases where
  5969. // that will help over what getRange already does, though.
  5970. return ConstantRange::getFull(BitWidth);
  5971. }
  5972. // NB! Calling ScalarEvolution::getConstant is fine, but we should not try to
  5973. // construct arbitrary general SCEV expressions here. This function is called
  5974. // from deep in the call stack, and calling getSCEV (on a sext instruction,
  5975. // say) can end up caching a suboptimal value.
  5976. // FIXME: without the explicit `this` receiver below, MSVC errors out with
  5977. // C2352 and C2512 (otherwise it isn't needed).
  5978. const SCEV *TrueStart = this->getConstant(StartPattern.TrueValue);
  5979. const SCEV *TrueStep = this->getConstant(StepPattern.TrueValue);
  5980. const SCEV *FalseStart = this->getConstant(StartPattern.FalseValue);
  5981. const SCEV *FalseStep = this->getConstant(StepPattern.FalseValue);
  5982. ConstantRange TrueRange =
  5983. this->getRangeForAffineAR(TrueStart, TrueStep, MaxBECount, BitWidth);
  5984. ConstantRange FalseRange =
  5985. this->getRangeForAffineAR(FalseStart, FalseStep, MaxBECount, BitWidth);
  5986. return TrueRange.unionWith(FalseRange);
  5987. }
  5988. SCEV::NoWrapFlags ScalarEvolution::getNoWrapFlagsFromUB(const Value *V) {
  5989. if (isa<ConstantExpr>(V)) return SCEV::FlagAnyWrap;
  5990. const BinaryOperator *BinOp = cast<BinaryOperator>(V);
  5991. // Return early if there are no flags to propagate to the SCEV.
  5992. SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
  5993. if (BinOp->hasNoUnsignedWrap())
  5994. Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW);
  5995. if (BinOp->hasNoSignedWrap())
  5996. Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW);
  5997. if (Flags == SCEV::FlagAnyWrap)
  5998. return SCEV::FlagAnyWrap;
  5999. return isSCEVExprNeverPoison(BinOp) ? Flags : SCEV::FlagAnyWrap;
  6000. }
  6001. const Instruction *
  6002. ScalarEvolution::getNonTrivialDefiningScopeBound(const SCEV *S) {
  6003. if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(S))
  6004. return &*AddRec->getLoop()->getHeader()->begin();
  6005. if (auto *U = dyn_cast<SCEVUnknown>(S))
  6006. if (auto *I = dyn_cast<Instruction>(U->getValue()))
  6007. return I;
  6008. return nullptr;
  6009. }
  6010. /// Fills \p Ops with unique operands of \p S, if it has operands. If not,
  6011. /// \p Ops remains unmodified.
  6012. static void collectUniqueOps(const SCEV *S,
  6013. SmallVectorImpl<const SCEV *> &Ops) {
  6014. SmallPtrSet<const SCEV *, 4> Unique;
  6015. auto InsertUnique = [&](const SCEV *S) {
  6016. if (Unique.insert(S).second)
  6017. Ops.push_back(S);
  6018. };
  6019. if (auto *S2 = dyn_cast<SCEVCastExpr>(S))
  6020. for (auto *Op : S2->operands())
  6021. InsertUnique(Op);
  6022. else if (auto *S2 = dyn_cast<SCEVNAryExpr>(S))
  6023. for (auto *Op : S2->operands())
  6024. InsertUnique(Op);
  6025. else if (auto *S2 = dyn_cast<SCEVUDivExpr>(S))
  6026. for (auto *Op : S2->operands())
  6027. InsertUnique(Op);
  6028. }
  6029. const Instruction *
  6030. ScalarEvolution::getDefiningScopeBound(ArrayRef<const SCEV *> Ops,
  6031. bool &Precise) {
  6032. Precise = true;
  6033. // Do a bounded search of the def relation of the requested SCEVs.
  6034. SmallSet<const SCEV *, 16> Visited;
  6035. SmallVector<const SCEV *> Worklist;
  6036. auto pushOp = [&](const SCEV *S) {
  6037. if (!Visited.insert(S).second)
  6038. return;
  6039. // Threshold of 30 here is arbitrary.
  6040. if (Visited.size() > 30) {
  6041. Precise = false;
  6042. return;
  6043. }
  6044. Worklist.push_back(S);
  6045. };
  6046. for (auto *S : Ops)
  6047. pushOp(S);
  6048. const Instruction *Bound = nullptr;
  6049. while (!Worklist.empty()) {
  6050. auto *S = Worklist.pop_back_val();
  6051. if (auto *DefI = getNonTrivialDefiningScopeBound(S)) {
  6052. if (!Bound || DT.dominates(Bound, DefI))
  6053. Bound = DefI;
  6054. } else {
  6055. SmallVector<const SCEV *, 4> Ops;
  6056. collectUniqueOps(S, Ops);
  6057. for (auto *Op : Ops)
  6058. pushOp(Op);
  6059. }
  6060. }
  6061. return Bound ? Bound : &*F.getEntryBlock().begin();
  6062. }
  6063. const Instruction *
  6064. ScalarEvolution::getDefiningScopeBound(ArrayRef<const SCEV *> Ops) {
  6065. bool Discard;
  6066. return getDefiningScopeBound(Ops, Discard);
  6067. }
  6068. bool ScalarEvolution::isGuaranteedToTransferExecutionTo(const Instruction *A,
  6069. const Instruction *B) {
  6070. if (A->getParent() == B->getParent() &&
  6071. isGuaranteedToTransferExecutionToSuccessor(A->getIterator(),
  6072. B->getIterator()))
  6073. return true;
  6074. auto *BLoop = LI.getLoopFor(B->getParent());
  6075. if (BLoop && BLoop->getHeader() == B->getParent() &&
  6076. BLoop->getLoopPreheader() == A->getParent() &&
  6077. isGuaranteedToTransferExecutionToSuccessor(A->getIterator(),
  6078. A->getParent()->end()) &&
  6079. isGuaranteedToTransferExecutionToSuccessor(B->getParent()->begin(),
  6080. B->getIterator()))
  6081. return true;
  6082. return false;
  6083. }
  6084. bool ScalarEvolution::isSCEVExprNeverPoison(const Instruction *I) {
  6085. // Only proceed if we can prove that I does not yield poison.
  6086. if (!programUndefinedIfPoison(I))
  6087. return false;
  6088. // At this point we know that if I is executed, then it does not wrap
  6089. // according to at least one of NSW or NUW. If I is not executed, then we do
  6090. // not know if the calculation that I represents would wrap. Multiple
  6091. // instructions can map to the same SCEV. If we apply NSW or NUW from I to
  6092. // the SCEV, we must guarantee no wrapping for that SCEV also when it is
  6093. // derived from other instructions that map to the same SCEV. We cannot make
  6094. // that guarantee for cases where I is not executed. So we need to find a
  6095. // upper bound on the defining scope for the SCEV, and prove that I is
  6096. // executed every time we enter that scope. When the bounding scope is a
  6097. // loop (the common case), this is equivalent to proving I executes on every
  6098. // iteration of that loop.
  6099. SmallVector<const SCEV *> SCEVOps;
  6100. for (const Use &Op : I->operands()) {
  6101. // I could be an extractvalue from a call to an overflow intrinsic.
  6102. // TODO: We can do better here in some cases.
  6103. if (isSCEVable(Op->getType()))
  6104. SCEVOps.push_back(getSCEV(Op));
  6105. }
  6106. auto *DefI = getDefiningScopeBound(SCEVOps);
  6107. return isGuaranteedToTransferExecutionTo(DefI, I);
  6108. }
  6109. bool ScalarEvolution::isAddRecNeverPoison(const Instruction *I, const Loop *L) {
  6110. // If we know that \c I can never be poison period, then that's enough.
  6111. if (isSCEVExprNeverPoison(I))
  6112. return true;
  6113. // For an add recurrence specifically, we assume that infinite loops without
  6114. // side effects are undefined behavior, and then reason as follows:
  6115. //
  6116. // If the add recurrence is poison in any iteration, it is poison on all
  6117. // future iterations (since incrementing poison yields poison). If the result
  6118. // of the add recurrence is fed into the loop latch condition and the loop
  6119. // does not contain any throws or exiting blocks other than the latch, we now
  6120. // have the ability to "choose" whether the backedge is taken or not (by
  6121. // choosing a sufficiently evil value for the poison feeding into the branch)
  6122. // for every iteration including and after the one in which \p I first became
  6123. // poison. There are two possibilities (let's call the iteration in which \p
  6124. // I first became poison as K):
  6125. //
  6126. // 1. In the set of iterations including and after K, the loop body executes
  6127. // no side effects. In this case executing the backege an infinte number
  6128. // of times will yield undefined behavior.
  6129. //
  6130. // 2. In the set of iterations including and after K, the loop body executes
  6131. // at least one side effect. In this case, that specific instance of side
  6132. // effect is control dependent on poison, which also yields undefined
  6133. // behavior.
  6134. auto *ExitingBB = L->getExitingBlock();
  6135. auto *LatchBB = L->getLoopLatch();
  6136. if (!ExitingBB || !LatchBB || ExitingBB != LatchBB)
  6137. return false;
  6138. SmallPtrSet<const Instruction *, 16> Pushed;
  6139. SmallVector<const Instruction *, 8> PoisonStack;
  6140. // We start by assuming \c I, the post-inc add recurrence, is poison. Only
  6141. // things that are known to be poison under that assumption go on the
  6142. // PoisonStack.
  6143. Pushed.insert(I);
  6144. PoisonStack.push_back(I);
  6145. bool LatchControlDependentOnPoison = false;
  6146. while (!PoisonStack.empty() && !LatchControlDependentOnPoison) {
  6147. const Instruction *Poison = PoisonStack.pop_back_val();
  6148. for (auto *PoisonUser : Poison->users()) {
  6149. if (propagatesPoison(cast<Operator>(PoisonUser))) {
  6150. if (Pushed.insert(cast<Instruction>(PoisonUser)).second)
  6151. PoisonStack.push_back(cast<Instruction>(PoisonUser));
  6152. } else if (auto *BI = dyn_cast<BranchInst>(PoisonUser)) {
  6153. assert(BI->isConditional() && "Only possibility!");
  6154. if (BI->getParent() == LatchBB) {
  6155. LatchControlDependentOnPoison = true;
  6156. break;
  6157. }
  6158. }
  6159. }
  6160. }
  6161. return LatchControlDependentOnPoison && loopHasNoAbnormalExits(L);
  6162. }
  6163. ScalarEvolution::LoopProperties
  6164. ScalarEvolution::getLoopProperties(const Loop *L) {
  6165. using LoopProperties = ScalarEvolution::LoopProperties;
  6166. auto Itr = LoopPropertiesCache.find(L);
  6167. if (Itr == LoopPropertiesCache.end()) {
  6168. auto HasSideEffects = [](Instruction *I) {
  6169. if (auto *SI = dyn_cast<StoreInst>(I))
  6170. return !SI->isSimple();
  6171. return I->mayThrow() || I->mayWriteToMemory();
  6172. };
  6173. LoopProperties LP = {/* HasNoAbnormalExits */ true,
  6174. /*HasNoSideEffects*/ true};
  6175. for (auto *BB : L->getBlocks())
  6176. for (auto &I : *BB) {
  6177. if (!isGuaranteedToTransferExecutionToSuccessor(&I))
  6178. LP.HasNoAbnormalExits = false;
  6179. if (HasSideEffects(&I))
  6180. LP.HasNoSideEffects = false;
  6181. if (!LP.HasNoAbnormalExits && !LP.HasNoSideEffects)
  6182. break; // We're already as pessimistic as we can get.
  6183. }
  6184. auto InsertPair = LoopPropertiesCache.insert({L, LP});
  6185. assert(InsertPair.second && "We just checked!");
  6186. Itr = InsertPair.first;
  6187. }
  6188. return Itr->second;
  6189. }
  6190. bool ScalarEvolution::loopIsFiniteByAssumption(const Loop *L) {
  6191. // A mustprogress loop without side effects must be finite.
  6192. // TODO: The check used here is very conservative. It's only *specific*
  6193. // side effects which are well defined in infinite loops.
  6194. return isFinite(L) || (isMustProgress(L) && loopHasNoSideEffects(L));
  6195. }
  6196. const SCEV *ScalarEvolution::createSCEV(Value *V) {
  6197. if (!isSCEVable(V->getType()))
  6198. return getUnknown(V);
  6199. if (Instruction *I = dyn_cast<Instruction>(V)) {
  6200. // Don't attempt to analyze instructions in blocks that aren't
  6201. // reachable. Such instructions don't matter, and they aren't required
  6202. // to obey basic rules for definitions dominating uses which this
  6203. // analysis depends on.
  6204. if (!DT.isReachableFromEntry(I->getParent()))
  6205. return getUnknown(UndefValue::get(V->getType()));
  6206. } else if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
  6207. return getConstant(CI);
  6208. else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V))
  6209. return GA->isInterposable() ? getUnknown(V) : getSCEV(GA->getAliasee());
  6210. else if (!isa<ConstantExpr>(V))
  6211. return getUnknown(V);
  6212. Operator *U = cast<Operator>(V);
  6213. if (auto BO = MatchBinaryOp(U, DT)) {
  6214. switch (BO->Opcode) {
  6215. case Instruction::Add: {
  6216. // The simple thing to do would be to just call getSCEV on both operands
  6217. // and call getAddExpr with the result. However if we're looking at a
  6218. // bunch of things all added together, this can be quite inefficient,
  6219. // because it leads to N-1 getAddExpr calls for N ultimate operands.
  6220. // Instead, gather up all the operands and make a single getAddExpr call.
  6221. // LLVM IR canonical form means we need only traverse the left operands.
  6222. SmallVector<const SCEV *, 4> AddOps;
  6223. do {
  6224. if (BO->Op) {
  6225. if (auto *OpSCEV = getExistingSCEV(BO->Op)) {
  6226. AddOps.push_back(OpSCEV);
  6227. break;
  6228. }
  6229. // If a NUW or NSW flag can be applied to the SCEV for this
  6230. // addition, then compute the SCEV for this addition by itself
  6231. // with a separate call to getAddExpr. We need to do that
  6232. // instead of pushing the operands of the addition onto AddOps,
  6233. // since the flags are only known to apply to this particular
  6234. // addition - they may not apply to other additions that can be
  6235. // formed with operands from AddOps.
  6236. const SCEV *RHS = getSCEV(BO->RHS);
  6237. SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(BO->Op);
  6238. if (Flags != SCEV::FlagAnyWrap) {
  6239. const SCEV *LHS = getSCEV(BO->LHS);
  6240. if (BO->Opcode == Instruction::Sub)
  6241. AddOps.push_back(getMinusSCEV(LHS, RHS, Flags));
  6242. else
  6243. AddOps.push_back(getAddExpr(LHS, RHS, Flags));
  6244. break;
  6245. }
  6246. }
  6247. if (BO->Opcode == Instruction::Sub)
  6248. AddOps.push_back(getNegativeSCEV(getSCEV(BO->RHS)));
  6249. else
  6250. AddOps.push_back(getSCEV(BO->RHS));
  6251. auto NewBO = MatchBinaryOp(BO->LHS, DT);
  6252. if (!NewBO || (NewBO->Opcode != Instruction::Add &&
  6253. NewBO->Opcode != Instruction::Sub)) {
  6254. AddOps.push_back(getSCEV(BO->LHS));
  6255. break;
  6256. }
  6257. BO = NewBO;
  6258. } while (true);
  6259. return getAddExpr(AddOps);
  6260. }
  6261. case Instruction::Mul: {
  6262. SmallVector<const SCEV *, 4> MulOps;
  6263. do {
  6264. if (BO->Op) {
  6265. if (auto *OpSCEV = getExistingSCEV(BO->Op)) {
  6266. MulOps.push_back(OpSCEV);
  6267. break;
  6268. }
  6269. SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(BO->Op);
  6270. if (Flags != SCEV::FlagAnyWrap) {
  6271. MulOps.push_back(
  6272. getMulExpr(getSCEV(BO->LHS), getSCEV(BO->RHS), Flags));
  6273. break;
  6274. }
  6275. }
  6276. MulOps.push_back(getSCEV(BO->RHS));
  6277. auto NewBO = MatchBinaryOp(BO->LHS, DT);
  6278. if (!NewBO || NewBO->Opcode != Instruction::Mul) {
  6279. MulOps.push_back(getSCEV(BO->LHS));
  6280. break;
  6281. }
  6282. BO = NewBO;
  6283. } while (true);
  6284. return getMulExpr(MulOps);
  6285. }
  6286. case Instruction::UDiv:
  6287. return getUDivExpr(getSCEV(BO->LHS), getSCEV(BO->RHS));
  6288. case Instruction::URem:
  6289. return getURemExpr(getSCEV(BO->LHS), getSCEV(BO->RHS));
  6290. case Instruction::Sub: {
  6291. SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
  6292. if (BO->Op)
  6293. Flags = getNoWrapFlagsFromUB(BO->Op);
  6294. return getMinusSCEV(getSCEV(BO->LHS), getSCEV(BO->RHS), Flags);
  6295. }
  6296. case Instruction::And:
  6297. // For an expression like x&255 that merely masks off the high bits,
  6298. // use zext(trunc(x)) as the SCEV expression.
  6299. if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS)) {
  6300. if (CI->isZero())
  6301. return getSCEV(BO->RHS);
  6302. if (CI->isMinusOne())
  6303. return getSCEV(BO->LHS);
  6304. const APInt &A = CI->getValue();
  6305. // Instcombine's ShrinkDemandedConstant may strip bits out of
  6306. // constants, obscuring what would otherwise be a low-bits mask.
  6307. // Use computeKnownBits to compute what ShrinkDemandedConstant
  6308. // knew about to reconstruct a low-bits mask value.
  6309. unsigned LZ = A.countLeadingZeros();
  6310. unsigned TZ = A.countTrailingZeros();
  6311. unsigned BitWidth = A.getBitWidth();
  6312. KnownBits Known(BitWidth);
  6313. computeKnownBits(BO->LHS, Known, getDataLayout(),
  6314. 0, &AC, nullptr, &DT);
  6315. APInt EffectiveMask =
  6316. APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ);
  6317. if ((LZ != 0 || TZ != 0) && !((~A & ~Known.Zero) & EffectiveMask)) {
  6318. const SCEV *MulCount = getConstant(APInt::getOneBitSet(BitWidth, TZ));
  6319. const SCEV *LHS = getSCEV(BO->LHS);
  6320. const SCEV *ShiftedLHS = nullptr;
  6321. if (auto *LHSMul = dyn_cast<SCEVMulExpr>(LHS)) {
  6322. if (auto *OpC = dyn_cast<SCEVConstant>(LHSMul->getOperand(0))) {
  6323. // For an expression like (x * 8) & 8, simplify the multiply.
  6324. unsigned MulZeros = OpC->getAPInt().countTrailingZeros();
  6325. unsigned GCD = std::min(MulZeros, TZ);
  6326. APInt DivAmt = APInt::getOneBitSet(BitWidth, TZ - GCD);
  6327. SmallVector<const SCEV*, 4> MulOps;
  6328. MulOps.push_back(getConstant(OpC->getAPInt().lshr(GCD)));
  6329. MulOps.append(LHSMul->op_begin() + 1, LHSMul->op_end());
  6330. auto *NewMul = getMulExpr(MulOps, LHSMul->getNoWrapFlags());
  6331. ShiftedLHS = getUDivExpr(NewMul, getConstant(DivAmt));
  6332. }
  6333. }
  6334. if (!ShiftedLHS)
  6335. ShiftedLHS = getUDivExpr(LHS, MulCount);
  6336. return getMulExpr(
  6337. getZeroExtendExpr(
  6338. getTruncateExpr(ShiftedLHS,
  6339. IntegerType::get(getContext(), BitWidth - LZ - TZ)),
  6340. BO->LHS->getType()),
  6341. MulCount);
  6342. }
  6343. }
  6344. break;
  6345. case Instruction::Or:
  6346. // If the RHS of the Or is a constant, we may have something like:
  6347. // X*4+1 which got turned into X*4|1. Handle this as an Add so loop
  6348. // optimizations will transparently handle this case.
  6349. //
  6350. // In order for this transformation to be safe, the LHS must be of the
  6351. // form X*(2^n) and the Or constant must be less than 2^n.
  6352. if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS)) {
  6353. const SCEV *LHS = getSCEV(BO->LHS);
  6354. const APInt &CIVal = CI->getValue();
  6355. if (GetMinTrailingZeros(LHS) >=
  6356. (CIVal.getBitWidth() - CIVal.countLeadingZeros())) {
  6357. // Build a plain add SCEV.
  6358. return getAddExpr(LHS, getSCEV(CI),
  6359. (SCEV::NoWrapFlags)(SCEV::FlagNUW | SCEV::FlagNSW));
  6360. }
  6361. }
  6362. break;
  6363. case Instruction::Xor:
  6364. if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS)) {
  6365. // If the RHS of xor is -1, then this is a not operation.
  6366. if (CI->isMinusOne())
  6367. return getNotSCEV(getSCEV(BO->LHS));
  6368. // Model xor(and(x, C), C) as and(~x, C), if C is a low-bits mask.
  6369. // This is a variant of the check for xor with -1, and it handles
  6370. // the case where instcombine has trimmed non-demanded bits out
  6371. // of an xor with -1.
  6372. if (auto *LBO = dyn_cast<BinaryOperator>(BO->LHS))
  6373. if (ConstantInt *LCI = dyn_cast<ConstantInt>(LBO->getOperand(1)))
  6374. if (LBO->getOpcode() == Instruction::And &&
  6375. LCI->getValue() == CI->getValue())
  6376. if (const SCEVZeroExtendExpr *Z =
  6377. dyn_cast<SCEVZeroExtendExpr>(getSCEV(BO->LHS))) {
  6378. Type *UTy = BO->LHS->getType();
  6379. const SCEV *Z0 = Z->getOperand();
  6380. Type *Z0Ty = Z0->getType();
  6381. unsigned Z0TySize = getTypeSizeInBits(Z0Ty);
  6382. // If C is a low-bits mask, the zero extend is serving to
  6383. // mask off the high bits. Complement the operand and
  6384. // re-apply the zext.
  6385. if (CI->getValue().isMask(Z0TySize))
  6386. return getZeroExtendExpr(getNotSCEV(Z0), UTy);
  6387. // If C is a single bit, it may be in the sign-bit position
  6388. // before the zero-extend. In this case, represent the xor
  6389. // using an add, which is equivalent, and re-apply the zext.
  6390. APInt Trunc = CI->getValue().trunc(Z0TySize);
  6391. if (Trunc.zext(getTypeSizeInBits(UTy)) == CI->getValue() &&
  6392. Trunc.isSignMask())
  6393. return getZeroExtendExpr(getAddExpr(Z0, getConstant(Trunc)),
  6394. UTy);
  6395. }
  6396. }
  6397. break;
  6398. case Instruction::Shl:
  6399. // Turn shift left of a constant amount into a multiply.
  6400. if (ConstantInt *SA = dyn_cast<ConstantInt>(BO->RHS)) {
  6401. uint32_t BitWidth = cast<IntegerType>(SA->getType())->getBitWidth();
  6402. // If the shift count is not less than the bitwidth, the result of
  6403. // the shift is undefined. Don't try to analyze it, because the
  6404. // resolution chosen here may differ from the resolution chosen in
  6405. // other parts of the compiler.
  6406. if (SA->getValue().uge(BitWidth))
  6407. break;
  6408. // We can safely preserve the nuw flag in all cases. It's also safe to
  6409. // turn a nuw nsw shl into a nuw nsw mul. However, nsw in isolation
  6410. // requires special handling. It can be preserved as long as we're not
  6411. // left shifting by bitwidth - 1.
  6412. auto Flags = SCEV::FlagAnyWrap;
  6413. if (BO->Op) {
  6414. auto MulFlags = getNoWrapFlagsFromUB(BO->Op);
  6415. if ((MulFlags & SCEV::FlagNSW) &&
  6416. ((MulFlags & SCEV::FlagNUW) || SA->getValue().ult(BitWidth - 1)))
  6417. Flags = (SCEV::NoWrapFlags)(Flags | SCEV::FlagNSW);
  6418. if (MulFlags & SCEV::FlagNUW)
  6419. Flags = (SCEV::NoWrapFlags)(Flags | SCEV::FlagNUW);
  6420. }
  6421. Constant *X = ConstantInt::get(
  6422. getContext(), APInt::getOneBitSet(BitWidth, SA->getZExtValue()));
  6423. return getMulExpr(getSCEV(BO->LHS), getSCEV(X), Flags);
  6424. }
  6425. break;
  6426. case Instruction::AShr: {
  6427. // AShr X, C, where C is a constant.
  6428. ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS);
  6429. if (!CI)
  6430. break;
  6431. Type *OuterTy = BO->LHS->getType();
  6432. uint64_t BitWidth = getTypeSizeInBits(OuterTy);
  6433. // If the shift count is not less than the bitwidth, the result of
  6434. // the shift is undefined. Don't try to analyze it, because the
  6435. // resolution chosen here may differ from the resolution chosen in
  6436. // other parts of the compiler.
  6437. if (CI->getValue().uge(BitWidth))
  6438. break;
  6439. if (CI->isZero())
  6440. return getSCEV(BO->LHS); // shift by zero --> noop
  6441. uint64_t AShrAmt = CI->getZExtValue();
  6442. Type *TruncTy = IntegerType::get(getContext(), BitWidth - AShrAmt);
  6443. Operator *L = dyn_cast<Operator>(BO->LHS);
  6444. if (L && L->getOpcode() == Instruction::Shl) {
  6445. // X = Shl A, n
  6446. // Y = AShr X, m
  6447. // Both n and m are constant.
  6448. const SCEV *ShlOp0SCEV = getSCEV(L->getOperand(0));
  6449. if (L->getOperand(1) == BO->RHS)
  6450. // For a two-shift sext-inreg, i.e. n = m,
  6451. // use sext(trunc(x)) as the SCEV expression.
  6452. return getSignExtendExpr(
  6453. getTruncateExpr(ShlOp0SCEV, TruncTy), OuterTy);
  6454. ConstantInt *ShlAmtCI = dyn_cast<ConstantInt>(L->getOperand(1));
  6455. if (ShlAmtCI && ShlAmtCI->getValue().ult(BitWidth)) {
  6456. uint64_t ShlAmt = ShlAmtCI->getZExtValue();
  6457. if (ShlAmt > AShrAmt) {
  6458. // When n > m, use sext(mul(trunc(x), 2^(n-m)))) as the SCEV
  6459. // expression. We already checked that ShlAmt < BitWidth, so
  6460. // the multiplier, 1 << (ShlAmt - AShrAmt), fits into TruncTy as
  6461. // ShlAmt - AShrAmt < Amt.
  6462. APInt Mul = APInt::getOneBitSet(BitWidth - AShrAmt,
  6463. ShlAmt - AShrAmt);
  6464. return getSignExtendExpr(
  6465. getMulExpr(getTruncateExpr(ShlOp0SCEV, TruncTy),
  6466. getConstant(Mul)), OuterTy);
  6467. }
  6468. }
  6469. }
  6470. break;
  6471. }
  6472. }
  6473. }
  6474. switch (U->getOpcode()) {
  6475. case Instruction::Trunc:
  6476. return getTruncateExpr(getSCEV(U->getOperand(0)), U->getType());
  6477. case Instruction::ZExt:
  6478. return getZeroExtendExpr(getSCEV(U->getOperand(0)), U->getType());
  6479. case Instruction::SExt:
  6480. if (auto BO = MatchBinaryOp(U->getOperand(0), DT)) {
  6481. // The NSW flag of a subtract does not always survive the conversion to
  6482. // A + (-1)*B. By pushing sign extension onto its operands we are much
  6483. // more likely to preserve NSW and allow later AddRec optimisations.
  6484. //
  6485. // NOTE: This is effectively duplicating this logic from getSignExtend:
  6486. // sext((A + B + ...)<nsw>) --> (sext(A) + sext(B) + ...)<nsw>
  6487. // but by that point the NSW information has potentially been lost.
  6488. if (BO->Opcode == Instruction::Sub && BO->IsNSW) {
  6489. Type *Ty = U->getType();
  6490. auto *V1 = getSignExtendExpr(getSCEV(BO->LHS), Ty);
  6491. auto *V2 = getSignExtendExpr(getSCEV(BO->RHS), Ty);
  6492. return getMinusSCEV(V1, V2, SCEV::FlagNSW);
  6493. }
  6494. }
  6495. return getSignExtendExpr(getSCEV(U->getOperand(0)), U->getType());
  6496. case Instruction::BitCast:
  6497. // BitCasts are no-op casts so we just eliminate the cast.
  6498. if (isSCEVable(U->getType()) && isSCEVable(U->getOperand(0)->getType()))
  6499. return getSCEV(U->getOperand(0));
  6500. break;
  6501. case Instruction::PtrToInt: {
  6502. // Pointer to integer cast is straight-forward, so do model it.
  6503. const SCEV *Op = getSCEV(U->getOperand(0));
  6504. Type *DstIntTy = U->getType();
  6505. // But only if effective SCEV (integer) type is wide enough to represent
  6506. // all possible pointer values.
  6507. const SCEV *IntOp = getPtrToIntExpr(Op, DstIntTy);
  6508. if (isa<SCEVCouldNotCompute>(IntOp))
  6509. return getUnknown(V);
  6510. return IntOp;
  6511. }
  6512. case Instruction::IntToPtr:
  6513. // Just don't deal with inttoptr casts.
  6514. return getUnknown(V);
  6515. case Instruction::SDiv:
  6516. // If both operands are non-negative, this is just an udiv.
  6517. if (isKnownNonNegative(getSCEV(U->getOperand(0))) &&
  6518. isKnownNonNegative(getSCEV(U->getOperand(1))))
  6519. return getUDivExpr(getSCEV(U->getOperand(0)), getSCEV(U->getOperand(1)));
  6520. break;
  6521. case Instruction::SRem:
  6522. // If both operands are non-negative, this is just an urem.
  6523. if (isKnownNonNegative(getSCEV(U->getOperand(0))) &&
  6524. isKnownNonNegative(getSCEV(U->getOperand(1))))
  6525. return getURemExpr(getSCEV(U->getOperand(0)), getSCEV(U->getOperand(1)));
  6526. break;
  6527. case Instruction::GetElementPtr:
  6528. return createNodeForGEP(cast<GEPOperator>(U));
  6529. case Instruction::PHI:
  6530. return createNodeForPHI(cast<PHINode>(U));
  6531. case Instruction::Select:
  6532. // U can also be a select constant expr, which let fall through. Since
  6533. // createNodeForSelect only works for a condition that is an `ICmpInst`, and
  6534. // constant expressions cannot have instructions as operands, we'd have
  6535. // returned getUnknown for a select constant expressions anyway.
  6536. if (isa<Instruction>(U))
  6537. return createNodeForSelectOrPHI(cast<Instruction>(U), U->getOperand(0),
  6538. U->getOperand(1), U->getOperand(2));
  6539. break;
  6540. case Instruction::Call:
  6541. case Instruction::Invoke:
  6542. if (Value *RV = cast<CallBase>(U)->getReturnedArgOperand())
  6543. return getSCEV(RV);
  6544. if (auto *II = dyn_cast<IntrinsicInst>(U)) {
  6545. switch (II->getIntrinsicID()) {
  6546. case Intrinsic::abs:
  6547. return getAbsExpr(
  6548. getSCEV(II->getArgOperand(0)),
  6549. /*IsNSW=*/cast<ConstantInt>(II->getArgOperand(1))->isOne());
  6550. case Intrinsic::umax:
  6551. return getUMaxExpr(getSCEV(II->getArgOperand(0)),
  6552. getSCEV(II->getArgOperand(1)));
  6553. case Intrinsic::umin:
  6554. return getUMinExpr(getSCEV(II->getArgOperand(0)),
  6555. getSCEV(II->getArgOperand(1)));
  6556. case Intrinsic::smax:
  6557. return getSMaxExpr(getSCEV(II->getArgOperand(0)),
  6558. getSCEV(II->getArgOperand(1)));
  6559. case Intrinsic::smin:
  6560. return getSMinExpr(getSCEV(II->getArgOperand(0)),
  6561. getSCEV(II->getArgOperand(1)));
  6562. case Intrinsic::usub_sat: {
  6563. const SCEV *X = getSCEV(II->getArgOperand(0));
  6564. const SCEV *Y = getSCEV(II->getArgOperand(1));
  6565. const SCEV *ClampedY = getUMinExpr(X, Y);
  6566. return getMinusSCEV(X, ClampedY, SCEV::FlagNUW);
  6567. }
  6568. case Intrinsic::uadd_sat: {
  6569. const SCEV *X = getSCEV(II->getArgOperand(0));
  6570. const SCEV *Y = getSCEV(II->getArgOperand(1));
  6571. const SCEV *ClampedX = getUMinExpr(X, getNotSCEV(Y));
  6572. return getAddExpr(ClampedX, Y, SCEV::FlagNUW);
  6573. }
  6574. case Intrinsic::start_loop_iterations:
  6575. // A start_loop_iterations is just equivalent to the first operand for
  6576. // SCEV purposes.
  6577. return getSCEV(II->getArgOperand(0));
  6578. default:
  6579. break;
  6580. }
  6581. }
  6582. break;
  6583. }
  6584. return getUnknown(V);
  6585. }
  6586. //===----------------------------------------------------------------------===//
  6587. // Iteration Count Computation Code
  6588. //
  6589. const SCEV *ScalarEvolution::getTripCountFromExitCount(const SCEV *ExitCount,
  6590. bool Extend) {
  6591. if (isa<SCEVCouldNotCompute>(ExitCount))
  6592. return getCouldNotCompute();
  6593. auto *ExitCountType = ExitCount->getType();
  6594. assert(ExitCountType->isIntegerTy());
  6595. if (!Extend)
  6596. return getAddExpr(ExitCount, getOne(ExitCountType));
  6597. auto *WiderType = Type::getIntNTy(ExitCountType->getContext(),
  6598. 1 + ExitCountType->getScalarSizeInBits());
  6599. return getAddExpr(getNoopOrZeroExtend(ExitCount, WiderType),
  6600. getOne(WiderType));
  6601. }
  6602. static unsigned getConstantTripCount(const SCEVConstant *ExitCount) {
  6603. if (!ExitCount)
  6604. return 0;
  6605. ConstantInt *ExitConst = ExitCount->getValue();
  6606. // Guard against huge trip counts.
  6607. if (ExitConst->getValue().getActiveBits() > 32)
  6608. return 0;
  6609. // In case of integer overflow, this returns 0, which is correct.
  6610. return ((unsigned)ExitConst->getZExtValue()) + 1;
  6611. }
  6612. unsigned ScalarEvolution::getSmallConstantTripCount(const Loop *L) {
  6613. auto *ExitCount = dyn_cast<SCEVConstant>(getBackedgeTakenCount(L, Exact));
  6614. return getConstantTripCount(ExitCount);
  6615. }
  6616. unsigned
  6617. ScalarEvolution::getSmallConstantTripCount(const Loop *L,
  6618. const BasicBlock *ExitingBlock) {
  6619. assert(ExitingBlock && "Must pass a non-null exiting block!");
  6620. assert(L->isLoopExiting(ExitingBlock) &&
  6621. "Exiting block must actually branch out of the loop!");
  6622. const SCEVConstant *ExitCount =
  6623. dyn_cast<SCEVConstant>(getExitCount(L, ExitingBlock));
  6624. return getConstantTripCount(ExitCount);
  6625. }
  6626. unsigned ScalarEvolution::getSmallConstantMaxTripCount(const Loop *L) {
  6627. const auto *MaxExitCount =
  6628. dyn_cast<SCEVConstant>(getConstantMaxBackedgeTakenCount(L));
  6629. return getConstantTripCount(MaxExitCount);
  6630. }
  6631. const SCEV *ScalarEvolution::getConstantMaxTripCountFromArray(const Loop *L) {
  6632. // We can't infer from Array in Irregular Loop.
  6633. // FIXME: It's hard to infer loop bound from array operated in Nested Loop.
  6634. if (!L->isLoopSimplifyForm() || !L->isInnermost())
  6635. return getCouldNotCompute();
  6636. // FIXME: To make the scene more typical, we only analysis loops that have
  6637. // one exiting block and that block must be the latch. To make it easier to
  6638. // capture loops that have memory access and memory access will be executed
  6639. // in each iteration.
  6640. const BasicBlock *LoopLatch = L->getLoopLatch();
  6641. assert(LoopLatch && "See defination of simplify form loop.");
  6642. if (L->getExitingBlock() != LoopLatch)
  6643. return getCouldNotCompute();
  6644. const DataLayout &DL = getDataLayout();
  6645. SmallVector<const SCEV *> InferCountColl;
  6646. for (auto *BB : L->getBlocks()) {
  6647. // Go here, we can know that Loop is a single exiting and simplified form
  6648. // loop. Make sure that infer from Memory Operation in those BBs must be
  6649. // executed in loop. First step, we can make sure that max execution time
  6650. // of MemAccessBB in loop represents latch max excution time.
  6651. // If MemAccessBB does not dom Latch, skip.
  6652. // Entry
  6653. // │
  6654. // ┌─────▼─────┐
  6655. // │Loop Header◄─────┐
  6656. // └──┬──────┬─┘ │
  6657. // │ │ │
  6658. // ┌────────▼──┐ ┌─▼─────┐ │
  6659. // │MemAccessBB│ │OtherBB│ │
  6660. // └────────┬──┘ └─┬─────┘ │
  6661. // │ │ │
  6662. // ┌─▼──────▼─┐ │
  6663. // │Loop Latch├─────┘
  6664. // └────┬─────┘
  6665. // ▼
  6666. // Exit
  6667. if (!DT.dominates(BB, LoopLatch))
  6668. continue;
  6669. for (Instruction &Inst : *BB) {
  6670. // Find Memory Operation Instruction.
  6671. auto *GEP = getLoadStorePointerOperand(&Inst);
  6672. if (!GEP)
  6673. continue;
  6674. auto *ElemSize = dyn_cast<SCEVConstant>(getElementSize(&Inst));
  6675. // Do not infer from scalar type, eg."ElemSize = sizeof()".
  6676. if (!ElemSize)
  6677. continue;
  6678. // Use a existing polynomial recurrence on the trip count.
  6679. auto *AddRec = dyn_cast<SCEVAddRecExpr>(getSCEV(GEP));
  6680. if (!AddRec)
  6681. continue;
  6682. auto *ArrBase = dyn_cast<SCEVUnknown>(getPointerBase(AddRec));
  6683. auto *Step = dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*this));
  6684. if (!ArrBase || !Step)
  6685. continue;
  6686. assert(isLoopInvariant(ArrBase, L) && "See addrec definition");
  6687. // Only handle { %array + step },
  6688. // FIXME: {(SCEVAddRecExpr) + step } could not be analysed here.
  6689. if (AddRec->getStart() != ArrBase)
  6690. continue;
  6691. // Memory operation pattern which have gaps.
  6692. // Or repeat memory opreation.
  6693. // And index of GEP wraps arround.
  6694. if (Step->getAPInt().getActiveBits() > 32 ||
  6695. Step->getAPInt().getZExtValue() !=
  6696. ElemSize->getAPInt().getZExtValue() ||
  6697. Step->isZero() || Step->getAPInt().isNegative())
  6698. continue;
  6699. // Only infer from stack array which has certain size.
  6700. // Make sure alloca instruction is not excuted in loop.
  6701. AllocaInst *AllocateInst = dyn_cast<AllocaInst>(ArrBase->getValue());
  6702. if (!AllocateInst || L->contains(AllocateInst->getParent()))
  6703. continue;
  6704. // Make sure only handle normal array.
  6705. auto *Ty = dyn_cast<ArrayType>(AllocateInst->getAllocatedType());
  6706. auto *ArrSize = dyn_cast<ConstantInt>(AllocateInst->getArraySize());
  6707. if (!Ty || !ArrSize || !ArrSize->isOne())
  6708. continue;
  6709. // FIXME: Since gep indices are silently zext to the indexing type,
  6710. // we will have a narrow gep index which wraps around rather than
  6711. // increasing strictly, we shoule ensure that step is increasing
  6712. // strictly by the loop iteration.
  6713. // Now we can infer a max execution time by MemLength/StepLength.
  6714. const SCEV *MemSize =
  6715. getConstant(Step->getType(), DL.getTypeAllocSize(Ty));
  6716. auto *MaxExeCount =
  6717. dyn_cast<SCEVConstant>(getUDivCeilSCEV(MemSize, Step));
  6718. if (!MaxExeCount || MaxExeCount->getAPInt().getActiveBits() > 32)
  6719. continue;
  6720. // If the loop reaches the maximum number of executions, we can not
  6721. // access bytes starting outside the statically allocated size without
  6722. // being immediate UB. But it is allowed to enter loop header one more
  6723. // time.
  6724. auto *InferCount = dyn_cast<SCEVConstant>(
  6725. getAddExpr(MaxExeCount, getOne(MaxExeCount->getType())));
  6726. // Discard the maximum number of execution times under 32bits.
  6727. if (!InferCount || InferCount->getAPInt().getActiveBits() > 32)
  6728. continue;
  6729. InferCountColl.push_back(InferCount);
  6730. }
  6731. }
  6732. if (InferCountColl.size() == 0)
  6733. return getCouldNotCompute();
  6734. return getUMinFromMismatchedTypes(InferCountColl);
  6735. }
  6736. unsigned ScalarEvolution::getSmallConstantTripMultiple(const Loop *L) {
  6737. SmallVector<BasicBlock *, 8> ExitingBlocks;
  6738. L->getExitingBlocks(ExitingBlocks);
  6739. Optional<unsigned> Res = None;
  6740. for (auto *ExitingBB : ExitingBlocks) {
  6741. unsigned Multiple = getSmallConstantTripMultiple(L, ExitingBB);
  6742. if (!Res)
  6743. Res = Multiple;
  6744. Res = (unsigned)GreatestCommonDivisor64(*Res, Multiple);
  6745. }
  6746. return Res.getValueOr(1);
  6747. }
  6748. unsigned ScalarEvolution::getSmallConstantTripMultiple(const Loop *L,
  6749. const SCEV *ExitCount) {
  6750. if (ExitCount == getCouldNotCompute())
  6751. return 1;
  6752. // Get the trip count
  6753. const SCEV *TCExpr = getTripCountFromExitCount(ExitCount);
  6754. const SCEVConstant *TC = dyn_cast<SCEVConstant>(TCExpr);
  6755. if (!TC)
  6756. // Attempt to factor more general cases. Returns the greatest power of
  6757. // two divisor. If overflow happens, the trip count expression is still
  6758. // divisible by the greatest power of 2 divisor returned.
  6759. return 1U << std::min((uint32_t)31,
  6760. GetMinTrailingZeros(applyLoopGuards(TCExpr, L)));
  6761. ConstantInt *Result = TC->getValue();
  6762. // Guard against huge trip counts (this requires checking
  6763. // for zero to handle the case where the trip count == -1 and the
  6764. // addition wraps).
  6765. if (!Result || Result->getValue().getActiveBits() > 32 ||
  6766. Result->getValue().getActiveBits() == 0)
  6767. return 1;
  6768. return (unsigned)Result->getZExtValue();
  6769. }
  6770. /// Returns the largest constant divisor of the trip count of this loop as a
  6771. /// normal unsigned value, if possible. This means that the actual trip count is
  6772. /// always a multiple of the returned value (don't forget the trip count could
  6773. /// very well be zero as well!).
  6774. ///
  6775. /// Returns 1 if the trip count is unknown or not guaranteed to be the
  6776. /// multiple of a constant (which is also the case if the trip count is simply
  6777. /// constant, use getSmallConstantTripCount for that case), Will also return 1
  6778. /// if the trip count is very large (>= 2^32).
  6779. ///
  6780. /// As explained in the comments for getSmallConstantTripCount, this assumes
  6781. /// that control exits the loop via ExitingBlock.
  6782. unsigned
  6783. ScalarEvolution::getSmallConstantTripMultiple(const Loop *L,
  6784. const BasicBlock *ExitingBlock) {
  6785. assert(ExitingBlock && "Must pass a non-null exiting block!");
  6786. assert(L->isLoopExiting(ExitingBlock) &&
  6787. "Exiting block must actually branch out of the loop!");
  6788. const SCEV *ExitCount = getExitCount(L, ExitingBlock);
  6789. return getSmallConstantTripMultiple(L, ExitCount);
  6790. }
  6791. const SCEV *ScalarEvolution::getExitCount(const Loop *L,
  6792. const BasicBlock *ExitingBlock,
  6793. ExitCountKind Kind) {
  6794. switch (Kind) {
  6795. case Exact:
  6796. case SymbolicMaximum:
  6797. return getBackedgeTakenInfo(L).getExact(ExitingBlock, this);
  6798. case ConstantMaximum:
  6799. return getBackedgeTakenInfo(L).getConstantMax(ExitingBlock, this);
  6800. };
  6801. llvm_unreachable("Invalid ExitCountKind!");
  6802. }
  6803. const SCEV *
  6804. ScalarEvolution::getPredicatedBackedgeTakenCount(const Loop *L,
  6805. SCEVUnionPredicate &Preds) {
  6806. return getPredicatedBackedgeTakenInfo(L).getExact(L, this, &Preds);
  6807. }
  6808. const SCEV *ScalarEvolution::getBackedgeTakenCount(const Loop *L,
  6809. ExitCountKind Kind) {
  6810. switch (Kind) {
  6811. case Exact:
  6812. return getBackedgeTakenInfo(L).getExact(L, this);
  6813. case ConstantMaximum:
  6814. return getBackedgeTakenInfo(L).getConstantMax(this);
  6815. case SymbolicMaximum:
  6816. return getBackedgeTakenInfo(L).getSymbolicMax(L, this);
  6817. };
  6818. llvm_unreachable("Invalid ExitCountKind!");
  6819. }
  6820. bool ScalarEvolution::isBackedgeTakenCountMaxOrZero(const Loop *L) {
  6821. return getBackedgeTakenInfo(L).isConstantMaxOrZero(this);
  6822. }
  6823. /// Push PHI nodes in the header of the given loop onto the given Worklist.
  6824. static void PushLoopPHIs(const Loop *L,
  6825. SmallVectorImpl<Instruction *> &Worklist,
  6826. SmallPtrSetImpl<Instruction *> &Visited) {
  6827. BasicBlock *Header = L->getHeader();
  6828. // Push all Loop-header PHIs onto the Worklist stack.
  6829. for (PHINode &PN : Header->phis())
  6830. if (Visited.insert(&PN).second)
  6831. Worklist.push_back(&PN);
  6832. }
  6833. const ScalarEvolution::BackedgeTakenInfo &
  6834. ScalarEvolution::getPredicatedBackedgeTakenInfo(const Loop *L) {
  6835. auto &BTI = getBackedgeTakenInfo(L);
  6836. if (BTI.hasFullInfo())
  6837. return BTI;
  6838. auto Pair = PredicatedBackedgeTakenCounts.insert({L, BackedgeTakenInfo()});
  6839. if (!Pair.second)
  6840. return Pair.first->second;
  6841. BackedgeTakenInfo Result =
  6842. computeBackedgeTakenCount(L, /*AllowPredicates=*/true);
  6843. return PredicatedBackedgeTakenCounts.find(L)->second = std::move(Result);
  6844. }
  6845. ScalarEvolution::BackedgeTakenInfo &
  6846. ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
  6847. // Initially insert an invalid entry for this loop. If the insertion
  6848. // succeeds, proceed to actually compute a backedge-taken count and
  6849. // update the value. The temporary CouldNotCompute value tells SCEV
  6850. // code elsewhere that it shouldn't attempt to request a new
  6851. // backedge-taken count, which could result in infinite recursion.
  6852. std::pair<DenseMap<const Loop *, BackedgeTakenInfo>::iterator, bool> Pair =
  6853. BackedgeTakenCounts.insert({L, BackedgeTakenInfo()});
  6854. if (!Pair.second)
  6855. return Pair.first->second;
  6856. // computeBackedgeTakenCount may allocate memory for its result. Inserting it
  6857. // into the BackedgeTakenCounts map transfers ownership. Otherwise, the result
  6858. // must be cleared in this scope.
  6859. BackedgeTakenInfo Result = computeBackedgeTakenCount(L);
  6860. // In product build, there are no usage of statistic.
  6861. (void)NumTripCountsComputed;
  6862. (void)NumTripCountsNotComputed;
  6863. #if LLVM_ENABLE_STATS || !defined(NDEBUG)
  6864. const SCEV *BEExact = Result.getExact(L, this);
  6865. if (BEExact != getCouldNotCompute()) {
  6866. assert(isLoopInvariant(BEExact, L) &&
  6867. isLoopInvariant(Result.getConstantMax(this), L) &&
  6868. "Computed backedge-taken count isn't loop invariant for loop!");
  6869. ++NumTripCountsComputed;
  6870. } else if (Result.getConstantMax(this) == getCouldNotCompute() &&
  6871. isa<PHINode>(L->getHeader()->begin())) {
  6872. // Only count loops that have phi nodes as not being computable.
  6873. ++NumTripCountsNotComputed;
  6874. }
  6875. #endif // LLVM_ENABLE_STATS || !defined(NDEBUG)
  6876. // Now that we know more about the trip count for this loop, forget any
  6877. // existing SCEV values for PHI nodes in this loop since they are only
  6878. // conservative estimates made without the benefit of trip count
  6879. // information. This invalidation is not necessary for correctness, and is
  6880. // only done to produce more precise results.
  6881. if (Result.hasAnyInfo()) {
  6882. // Invalidate any expression using an addrec in this loop.
  6883. SmallVector<const SCEV *, 8> ToForget;
  6884. auto LoopUsersIt = LoopUsers.find(L);
  6885. if (LoopUsersIt != LoopUsers.end())
  6886. append_range(ToForget, LoopUsersIt->second);
  6887. forgetMemoizedResults(ToForget);
  6888. // Invalidate constant-evolved loop header phis.
  6889. for (PHINode &PN : L->getHeader()->phis())
  6890. ConstantEvolutionLoopExitValue.erase(&PN);
  6891. }
  6892. // Re-lookup the insert position, since the call to
  6893. // computeBackedgeTakenCount above could result in a
  6894. // recusive call to getBackedgeTakenInfo (on a different
  6895. // loop), which would invalidate the iterator computed
  6896. // earlier.
  6897. return BackedgeTakenCounts.find(L)->second = std::move(Result);
  6898. }
  6899. void ScalarEvolution::forgetAllLoops() {
  6900. // This method is intended to forget all info about loops. It should
  6901. // invalidate caches as if the following happened:
  6902. // - The trip counts of all loops have changed arbitrarily
  6903. // - Every llvm::Value has been updated in place to produce a different
  6904. // result.
  6905. BackedgeTakenCounts.clear();
  6906. PredicatedBackedgeTakenCounts.clear();
  6907. BECountUsers.clear();
  6908. LoopPropertiesCache.clear();
  6909. ConstantEvolutionLoopExitValue.clear();
  6910. ValueExprMap.clear();
  6911. ValuesAtScopes.clear();
  6912. ValuesAtScopesUsers.clear();
  6913. LoopDispositions.clear();
  6914. BlockDispositions.clear();
  6915. UnsignedRanges.clear();
  6916. SignedRanges.clear();
  6917. ExprValueMap.clear();
  6918. HasRecMap.clear();
  6919. MinTrailingZerosCache.clear();
  6920. PredicatedSCEVRewrites.clear();
  6921. }
  6922. void ScalarEvolution::forgetLoop(const Loop *L) {
  6923. SmallVector<const Loop *, 16> LoopWorklist(1, L);
  6924. SmallVector<Instruction *, 32> Worklist;
  6925. SmallPtrSet<Instruction *, 16> Visited;
  6926. SmallVector<const SCEV *, 16> ToForget;
  6927. // Iterate over all the loops and sub-loops to drop SCEV information.
  6928. while (!LoopWorklist.empty()) {
  6929. auto *CurrL = LoopWorklist.pop_back_val();
  6930. // Drop any stored trip count value.
  6931. forgetBackedgeTakenCounts(CurrL, /* Predicated */ false);
  6932. forgetBackedgeTakenCounts(CurrL, /* Predicated */ true);
  6933. // Drop information about predicated SCEV rewrites for this loop.
  6934. for (auto I = PredicatedSCEVRewrites.begin();
  6935. I != PredicatedSCEVRewrites.end();) {
  6936. std::pair<const SCEV *, const Loop *> Entry = I->first;
  6937. if (Entry.second == CurrL)
  6938. PredicatedSCEVRewrites.erase(I++);
  6939. else
  6940. ++I;
  6941. }
  6942. auto LoopUsersItr = LoopUsers.find(CurrL);
  6943. if (LoopUsersItr != LoopUsers.end()) {
  6944. ToForget.insert(ToForget.end(), LoopUsersItr->second.begin(),
  6945. LoopUsersItr->second.end());
  6946. LoopUsers.erase(LoopUsersItr);
  6947. }
  6948. // Drop information about expressions based on loop-header PHIs.
  6949. PushLoopPHIs(CurrL, Worklist, Visited);
  6950. while (!Worklist.empty()) {
  6951. Instruction *I = Worklist.pop_back_val();
  6952. ValueExprMapType::iterator It =
  6953. ValueExprMap.find_as(static_cast<Value *>(I));
  6954. if (It != ValueExprMap.end()) {
  6955. eraseValueFromMap(It->first);
  6956. ToForget.push_back(It->second);
  6957. if (PHINode *PN = dyn_cast<PHINode>(I))
  6958. ConstantEvolutionLoopExitValue.erase(PN);
  6959. }
  6960. PushDefUseChildren(I, Worklist, Visited);
  6961. }
  6962. LoopPropertiesCache.erase(CurrL);
  6963. // Forget all contained loops too, to avoid dangling entries in the
  6964. // ValuesAtScopes map.
  6965. LoopWorklist.append(CurrL->begin(), CurrL->end());
  6966. }
  6967. forgetMemoizedResults(ToForget);
  6968. }
  6969. void ScalarEvolution::forgetTopmostLoop(const Loop *L) {
  6970. while (Loop *Parent = L->getParentLoop())
  6971. L = Parent;
  6972. forgetLoop(L);
  6973. }
  6974. void ScalarEvolution::forgetValue(Value *V) {
  6975. Instruction *I = dyn_cast<Instruction>(V);
  6976. if (!I) return;
  6977. // Drop information about expressions based on loop-header PHIs.
  6978. SmallVector<Instruction *, 16> Worklist;
  6979. SmallPtrSet<Instruction *, 8> Visited;
  6980. SmallVector<const SCEV *, 8> ToForget;
  6981. Worklist.push_back(I);
  6982. Visited.insert(I);
  6983. while (!Worklist.empty()) {
  6984. I = Worklist.pop_back_val();
  6985. ValueExprMapType::iterator It =
  6986. ValueExprMap.find_as(static_cast<Value *>(I));
  6987. if (It != ValueExprMap.end()) {
  6988. eraseValueFromMap(It->first);
  6989. ToForget.push_back(It->second);
  6990. if (PHINode *PN = dyn_cast<PHINode>(I))
  6991. ConstantEvolutionLoopExitValue.erase(PN);
  6992. }
  6993. PushDefUseChildren(I, Worklist, Visited);
  6994. }
  6995. forgetMemoizedResults(ToForget);
  6996. }
  6997. void ScalarEvolution::forgetLoopDispositions(const Loop *L) {
  6998. LoopDispositions.clear();
  6999. }
  7000. /// Get the exact loop backedge taken count considering all loop exits. A
  7001. /// computable result can only be returned for loops with all exiting blocks
  7002. /// dominating the latch. howFarToZero assumes that the limit of each loop test
  7003. /// is never skipped. This is a valid assumption as long as the loop exits via
  7004. /// that test. For precise results, it is the caller's responsibility to specify
  7005. /// the relevant loop exiting block using getExact(ExitingBlock, SE).
  7006. const SCEV *
  7007. ScalarEvolution::BackedgeTakenInfo::getExact(const Loop *L, ScalarEvolution *SE,
  7008. SCEVUnionPredicate *Preds) const {
  7009. // If any exits were not computable, the loop is not computable.
  7010. if (!isComplete() || ExitNotTaken.empty())
  7011. return SE->getCouldNotCompute();
  7012. const BasicBlock *Latch = L->getLoopLatch();
  7013. // All exiting blocks we have collected must dominate the only backedge.
  7014. if (!Latch)
  7015. return SE->getCouldNotCompute();
  7016. // All exiting blocks we have gathered dominate loop's latch, so exact trip
  7017. // count is simply a minimum out of all these calculated exit counts.
  7018. SmallVector<const SCEV *, 2> Ops;
  7019. for (auto &ENT : ExitNotTaken) {
  7020. const SCEV *BECount = ENT.ExactNotTaken;
  7021. assert(BECount != SE->getCouldNotCompute() && "Bad exit SCEV!");
  7022. assert(SE->DT.dominates(ENT.ExitingBlock, Latch) &&
  7023. "We should only have known counts for exiting blocks that dominate "
  7024. "latch!");
  7025. Ops.push_back(BECount);
  7026. if (Preds && !ENT.hasAlwaysTruePredicate())
  7027. Preds->add(ENT.Predicate.get());
  7028. assert((Preds || ENT.hasAlwaysTruePredicate()) &&
  7029. "Predicate should be always true!");
  7030. }
  7031. return SE->getUMinFromMismatchedTypes(Ops);
  7032. }
  7033. /// Get the exact not taken count for this loop exit.
  7034. const SCEV *
  7035. ScalarEvolution::BackedgeTakenInfo::getExact(const BasicBlock *ExitingBlock,
  7036. ScalarEvolution *SE) const {
  7037. for (auto &ENT : ExitNotTaken)
  7038. if (ENT.ExitingBlock == ExitingBlock && ENT.hasAlwaysTruePredicate())
  7039. return ENT.ExactNotTaken;
  7040. return SE->getCouldNotCompute();
  7041. }
  7042. const SCEV *ScalarEvolution::BackedgeTakenInfo::getConstantMax(
  7043. const BasicBlock *ExitingBlock, ScalarEvolution *SE) const {
  7044. for (auto &ENT : ExitNotTaken)
  7045. if (ENT.ExitingBlock == ExitingBlock && ENT.hasAlwaysTruePredicate())
  7046. return ENT.MaxNotTaken;
  7047. return SE->getCouldNotCompute();
  7048. }
  7049. /// getConstantMax - Get the constant max backedge taken count for the loop.
  7050. const SCEV *
  7051. ScalarEvolution::BackedgeTakenInfo::getConstantMax(ScalarEvolution *SE) const {
  7052. auto PredicateNotAlwaysTrue = [](const ExitNotTakenInfo &ENT) {
  7053. return !ENT.hasAlwaysTruePredicate();
  7054. };
  7055. if (!getConstantMax() || any_of(ExitNotTaken, PredicateNotAlwaysTrue))
  7056. return SE->getCouldNotCompute();
  7057. assert((isa<SCEVCouldNotCompute>(getConstantMax()) ||
  7058. isa<SCEVConstant>(getConstantMax())) &&
  7059. "No point in having a non-constant max backedge taken count!");
  7060. return getConstantMax();
  7061. }
  7062. const SCEV *
  7063. ScalarEvolution::BackedgeTakenInfo::getSymbolicMax(const Loop *L,
  7064. ScalarEvolution *SE) {
  7065. if (!SymbolicMax)
  7066. SymbolicMax = SE->computeSymbolicMaxBackedgeTakenCount(L);
  7067. return SymbolicMax;
  7068. }
  7069. bool ScalarEvolution::BackedgeTakenInfo::isConstantMaxOrZero(
  7070. ScalarEvolution *SE) const {
  7071. auto PredicateNotAlwaysTrue = [](const ExitNotTakenInfo &ENT) {
  7072. return !ENT.hasAlwaysTruePredicate();
  7073. };
  7074. return MaxOrZero && !any_of(ExitNotTaken, PredicateNotAlwaysTrue);
  7075. }
  7076. ScalarEvolution::ExitLimit::ExitLimit(const SCEV *E)
  7077. : ExitLimit(E, E, false, None) {
  7078. }
  7079. ScalarEvolution::ExitLimit::ExitLimit(
  7080. const SCEV *E, const SCEV *M, bool MaxOrZero,
  7081. ArrayRef<const SmallPtrSetImpl<const SCEVPredicate *> *> PredSetList)
  7082. : ExactNotTaken(E), MaxNotTaken(M), MaxOrZero(MaxOrZero) {
  7083. // If we prove the max count is zero, so is the symbolic bound. This happens
  7084. // in practice due to differences in a) how context sensitive we've chosen
  7085. // to be and b) how we reason about bounds impied by UB.
  7086. if (MaxNotTaken->isZero())
  7087. ExactNotTaken = MaxNotTaken;
  7088. assert((isa<SCEVCouldNotCompute>(ExactNotTaken) ||
  7089. !isa<SCEVCouldNotCompute>(MaxNotTaken)) &&
  7090. "Exact is not allowed to be less precise than Max");
  7091. assert((isa<SCEVCouldNotCompute>(MaxNotTaken) ||
  7092. isa<SCEVConstant>(MaxNotTaken)) &&
  7093. "No point in having a non-constant max backedge taken count!");
  7094. for (auto *PredSet : PredSetList)
  7095. for (auto *P : *PredSet)
  7096. addPredicate(P);
  7097. assert((isa<SCEVCouldNotCompute>(E) || !E->getType()->isPointerTy()) &&
  7098. "Backedge count should be int");
  7099. assert((isa<SCEVCouldNotCompute>(M) || !M->getType()->isPointerTy()) &&
  7100. "Max backedge count should be int");
  7101. }
  7102. ScalarEvolution::ExitLimit::ExitLimit(
  7103. const SCEV *E, const SCEV *M, bool MaxOrZero,
  7104. const SmallPtrSetImpl<const SCEVPredicate *> &PredSet)
  7105. : ExitLimit(E, M, MaxOrZero, {&PredSet}) {
  7106. }
  7107. ScalarEvolution::ExitLimit::ExitLimit(const SCEV *E, const SCEV *M,
  7108. bool MaxOrZero)
  7109. : ExitLimit(E, M, MaxOrZero, None) {
  7110. }
  7111. /// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each
  7112. /// computable exit into a persistent ExitNotTakenInfo array.
  7113. ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo(
  7114. ArrayRef<ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo> ExitCounts,
  7115. bool IsComplete, const SCEV *ConstantMax, bool MaxOrZero)
  7116. : ConstantMax(ConstantMax), IsComplete(IsComplete), MaxOrZero(MaxOrZero) {
  7117. using EdgeExitInfo = ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo;
  7118. ExitNotTaken.reserve(ExitCounts.size());
  7119. std::transform(
  7120. ExitCounts.begin(), ExitCounts.end(), std::back_inserter(ExitNotTaken),
  7121. [&](const EdgeExitInfo &EEI) {
  7122. BasicBlock *ExitBB = EEI.first;
  7123. const ExitLimit &EL = EEI.second;
  7124. if (EL.Predicates.empty())
  7125. return ExitNotTakenInfo(ExitBB, EL.ExactNotTaken, EL.MaxNotTaken,
  7126. nullptr);
  7127. std::unique_ptr<SCEVUnionPredicate> Predicate(new SCEVUnionPredicate);
  7128. for (auto *Pred : EL.Predicates)
  7129. Predicate->add(Pred);
  7130. return ExitNotTakenInfo(ExitBB, EL.ExactNotTaken, EL.MaxNotTaken,
  7131. std::move(Predicate));
  7132. });
  7133. assert((isa<SCEVCouldNotCompute>(ConstantMax) ||
  7134. isa<SCEVConstant>(ConstantMax)) &&
  7135. "No point in having a non-constant max backedge taken count!");
  7136. }
  7137. /// Compute the number of times the backedge of the specified loop will execute.
  7138. ScalarEvolution::BackedgeTakenInfo
  7139. ScalarEvolution::computeBackedgeTakenCount(const Loop *L,
  7140. bool AllowPredicates) {
  7141. SmallVector<BasicBlock *, 8> ExitingBlocks;
  7142. L->getExitingBlocks(ExitingBlocks);
  7143. using EdgeExitInfo = ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo;
  7144. SmallVector<EdgeExitInfo, 4> ExitCounts;
  7145. bool CouldComputeBECount = true;
  7146. BasicBlock *Latch = L->getLoopLatch(); // may be NULL.
  7147. const SCEV *MustExitMaxBECount = nullptr;
  7148. const SCEV *MayExitMaxBECount = nullptr;
  7149. bool MustExitMaxOrZero = false;
  7150. // Compute the ExitLimit for each loop exit. Use this to populate ExitCounts
  7151. // and compute maxBECount.
  7152. // Do a union of all the predicates here.
  7153. for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
  7154. BasicBlock *ExitBB = ExitingBlocks[i];
  7155. // We canonicalize untaken exits to br (constant), ignore them so that
  7156. // proving an exit untaken doesn't negatively impact our ability to reason
  7157. // about the loop as whole.
  7158. if (auto *BI = dyn_cast<BranchInst>(ExitBB->getTerminator()))
  7159. if (auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
  7160. bool ExitIfTrue = !L->contains(BI->getSuccessor(0));
  7161. if (ExitIfTrue == CI->isZero())
  7162. continue;
  7163. }
  7164. ExitLimit EL = computeExitLimit(L, ExitBB, AllowPredicates);
  7165. assert((AllowPredicates || EL.Predicates.empty()) &&
  7166. "Predicated exit limit when predicates are not allowed!");
  7167. // 1. For each exit that can be computed, add an entry to ExitCounts.
  7168. // CouldComputeBECount is true only if all exits can be computed.
  7169. if (EL.ExactNotTaken == getCouldNotCompute())
  7170. // We couldn't compute an exact value for this exit, so
  7171. // we won't be able to compute an exact value for the loop.
  7172. CouldComputeBECount = false;
  7173. else
  7174. ExitCounts.emplace_back(ExitBB, EL);
  7175. // 2. Derive the loop's MaxBECount from each exit's max number of
  7176. // non-exiting iterations. Partition the loop exits into two kinds:
  7177. // LoopMustExits and LoopMayExits.
  7178. //
  7179. // If the exit dominates the loop latch, it is a LoopMustExit otherwise it
  7180. // is a LoopMayExit. If any computable LoopMustExit is found, then
  7181. // MaxBECount is the minimum EL.MaxNotTaken of computable
  7182. // LoopMustExits. Otherwise, MaxBECount is conservatively the maximum
  7183. // EL.MaxNotTaken, where CouldNotCompute is considered greater than any
  7184. // computable EL.MaxNotTaken.
  7185. if (EL.MaxNotTaken != getCouldNotCompute() && Latch &&
  7186. DT.dominates(ExitBB, Latch)) {
  7187. if (!MustExitMaxBECount) {
  7188. MustExitMaxBECount = EL.MaxNotTaken;
  7189. MustExitMaxOrZero = EL.MaxOrZero;
  7190. } else {
  7191. MustExitMaxBECount =
  7192. getUMinFromMismatchedTypes(MustExitMaxBECount, EL.MaxNotTaken);
  7193. }
  7194. } else if (MayExitMaxBECount != getCouldNotCompute()) {
  7195. if (!MayExitMaxBECount || EL.MaxNotTaken == getCouldNotCompute())
  7196. MayExitMaxBECount = EL.MaxNotTaken;
  7197. else {
  7198. MayExitMaxBECount =
  7199. getUMaxFromMismatchedTypes(MayExitMaxBECount, EL.MaxNotTaken);
  7200. }
  7201. }
  7202. }
  7203. const SCEV *MaxBECount = MustExitMaxBECount ? MustExitMaxBECount :
  7204. (MayExitMaxBECount ? MayExitMaxBECount : getCouldNotCompute());
  7205. // The loop backedge will be taken the maximum or zero times if there's
  7206. // a single exit that must be taken the maximum or zero times.
  7207. bool MaxOrZero = (MustExitMaxOrZero && ExitingBlocks.size() == 1);
  7208. // Remember which SCEVs are used in exit limits for invalidation purposes.
  7209. // We only care about non-constant SCEVs here, so we can ignore EL.MaxNotTaken
  7210. // and MaxBECount, which must be SCEVConstant.
  7211. for (const auto &Pair : ExitCounts)
  7212. if (!isa<SCEVConstant>(Pair.second.ExactNotTaken))
  7213. BECountUsers[Pair.second.ExactNotTaken].insert({L, AllowPredicates});
  7214. return BackedgeTakenInfo(std::move(ExitCounts), CouldComputeBECount,
  7215. MaxBECount, MaxOrZero);
  7216. }
  7217. ScalarEvolution::ExitLimit
  7218. ScalarEvolution::computeExitLimit(const Loop *L, BasicBlock *ExitingBlock,
  7219. bool AllowPredicates) {
  7220. assert(L->contains(ExitingBlock) && "Exit count for non-loop block?");
  7221. // If our exiting block does not dominate the latch, then its connection with
  7222. // loop's exit limit may be far from trivial.
  7223. const BasicBlock *Latch = L->getLoopLatch();
  7224. if (!Latch || !DT.dominates(ExitingBlock, Latch))
  7225. return getCouldNotCompute();
  7226. bool IsOnlyExit = (L->getExitingBlock() != nullptr);
  7227. Instruction *Term = ExitingBlock->getTerminator();
  7228. if (BranchInst *BI = dyn_cast<BranchInst>(Term)) {
  7229. assert(BI->isConditional() && "If unconditional, it can't be in loop!");
  7230. bool ExitIfTrue = !L->contains(BI->getSuccessor(0));
  7231. assert(ExitIfTrue == L->contains(BI->getSuccessor(1)) &&
  7232. "It should have one successor in loop and one exit block!");
  7233. // Proceed to the next level to examine the exit condition expression.
  7234. return computeExitLimitFromCond(
  7235. L, BI->getCondition(), ExitIfTrue,
  7236. /*ControlsExit=*/IsOnlyExit, AllowPredicates);
  7237. }
  7238. if (SwitchInst *SI = dyn_cast<SwitchInst>(Term)) {
  7239. // For switch, make sure that there is a single exit from the loop.
  7240. BasicBlock *Exit = nullptr;
  7241. for (auto *SBB : successors(ExitingBlock))
  7242. if (!L->contains(SBB)) {
  7243. if (Exit) // Multiple exit successors.
  7244. return getCouldNotCompute();
  7245. Exit = SBB;
  7246. }
  7247. assert(Exit && "Exiting block must have at least one exit");
  7248. return computeExitLimitFromSingleExitSwitch(L, SI, Exit,
  7249. /*ControlsExit=*/IsOnlyExit);
  7250. }
  7251. return getCouldNotCompute();
  7252. }
  7253. ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCond(
  7254. const Loop *L, Value *ExitCond, bool ExitIfTrue,
  7255. bool ControlsExit, bool AllowPredicates) {
  7256. ScalarEvolution::ExitLimitCacheTy Cache(L, ExitIfTrue, AllowPredicates);
  7257. return computeExitLimitFromCondCached(Cache, L, ExitCond, ExitIfTrue,
  7258. ControlsExit, AllowPredicates);
  7259. }
  7260. Optional<ScalarEvolution::ExitLimit>
  7261. ScalarEvolution::ExitLimitCache::find(const Loop *L, Value *ExitCond,
  7262. bool ExitIfTrue, bool ControlsExit,
  7263. bool AllowPredicates) {
  7264. (void)this->L;
  7265. (void)this->ExitIfTrue;
  7266. (void)this->AllowPredicates;
  7267. assert(this->L == L && this->ExitIfTrue == ExitIfTrue &&
  7268. this->AllowPredicates == AllowPredicates &&
  7269. "Variance in assumed invariant key components!");
  7270. auto Itr = TripCountMap.find({ExitCond, ControlsExit});
  7271. if (Itr == TripCountMap.end())
  7272. return None;
  7273. return Itr->second;
  7274. }
  7275. void ScalarEvolution::ExitLimitCache::insert(const Loop *L, Value *ExitCond,
  7276. bool ExitIfTrue,
  7277. bool ControlsExit,
  7278. bool AllowPredicates,
  7279. const ExitLimit &EL) {
  7280. assert(this->L == L && this->ExitIfTrue == ExitIfTrue &&
  7281. this->AllowPredicates == AllowPredicates &&
  7282. "Variance in assumed invariant key components!");
  7283. auto InsertResult = TripCountMap.insert({{ExitCond, ControlsExit}, EL});
  7284. assert(InsertResult.second && "Expected successful insertion!");
  7285. (void)InsertResult;
  7286. (void)ExitIfTrue;
  7287. }
  7288. ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondCached(
  7289. ExitLimitCacheTy &Cache, const Loop *L, Value *ExitCond, bool ExitIfTrue,
  7290. bool ControlsExit, bool AllowPredicates) {
  7291. if (auto MaybeEL =
  7292. Cache.find(L, ExitCond, ExitIfTrue, ControlsExit, AllowPredicates))
  7293. return *MaybeEL;
  7294. ExitLimit EL = computeExitLimitFromCondImpl(Cache, L, ExitCond, ExitIfTrue,
  7295. ControlsExit, AllowPredicates);
  7296. Cache.insert(L, ExitCond, ExitIfTrue, ControlsExit, AllowPredicates, EL);
  7297. return EL;
  7298. }
  7299. ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl(
  7300. ExitLimitCacheTy &Cache, const Loop *L, Value *ExitCond, bool ExitIfTrue,
  7301. bool ControlsExit, bool AllowPredicates) {
  7302. // Handle BinOp conditions (And, Or).
  7303. if (auto LimitFromBinOp = computeExitLimitFromCondFromBinOp(
  7304. Cache, L, ExitCond, ExitIfTrue, ControlsExit, AllowPredicates))
  7305. return *LimitFromBinOp;
  7306. // With an icmp, it may be feasible to compute an exact backedge-taken count.
  7307. // Proceed to the next level to examine the icmp.
  7308. if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond)) {
  7309. ExitLimit EL =
  7310. computeExitLimitFromICmp(L, ExitCondICmp, ExitIfTrue, ControlsExit);
  7311. if (EL.hasFullInfo() || !AllowPredicates)
  7312. return EL;
  7313. // Try again, but use SCEV predicates this time.
  7314. return computeExitLimitFromICmp(L, ExitCondICmp, ExitIfTrue, ControlsExit,
  7315. /*AllowPredicates=*/true);
  7316. }
  7317. // Check for a constant condition. These are normally stripped out by
  7318. // SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to
  7319. // preserve the CFG and is temporarily leaving constant conditions
  7320. // in place.
  7321. if (ConstantInt *CI = dyn_cast<ConstantInt>(ExitCond)) {
  7322. if (ExitIfTrue == !CI->getZExtValue())
  7323. // The backedge is always taken.
  7324. return getCouldNotCompute();
  7325. else
  7326. // The backedge is never taken.
  7327. return getZero(CI->getType());
  7328. }
  7329. // If we're exiting based on the overflow flag of an x.with.overflow intrinsic
  7330. // with a constant step, we can form an equivalent icmp predicate and figure
  7331. // out how many iterations will be taken before we exit.
  7332. const WithOverflowInst *WO;
  7333. const APInt *C;
  7334. if (match(ExitCond, m_ExtractValue<1>(m_WithOverflowInst(WO))) &&
  7335. match(WO->getRHS(), m_APInt(C))) {
  7336. ConstantRange NWR =
  7337. ConstantRange::makeExactNoWrapRegion(WO->getBinaryOp(), *C,
  7338. WO->getNoWrapKind());
  7339. CmpInst::Predicate Pred;
  7340. APInt NewRHSC, Offset;
  7341. NWR.getEquivalentICmp(Pred, NewRHSC, Offset);
  7342. if (!ExitIfTrue)
  7343. Pred = ICmpInst::getInversePredicate(Pred);
  7344. auto *LHS = getSCEV(WO->getLHS());
  7345. if (Offset != 0)
  7346. LHS = getAddExpr(LHS, getConstant(Offset));
  7347. auto EL = computeExitLimitFromICmp(L, Pred, LHS, getConstant(NewRHSC),
  7348. ControlsExit, AllowPredicates);
  7349. if (EL.hasAnyInfo()) return EL;
  7350. }
  7351. // If it's not an integer or pointer comparison then compute it the hard way.
  7352. return computeExitCountExhaustively(L, ExitCond, ExitIfTrue);
  7353. }
  7354. Optional<ScalarEvolution::ExitLimit>
  7355. ScalarEvolution::computeExitLimitFromCondFromBinOp(
  7356. ExitLimitCacheTy &Cache, const Loop *L, Value *ExitCond, bool ExitIfTrue,
  7357. bool ControlsExit, bool AllowPredicates) {
  7358. // Check if the controlling expression for this loop is an And or Or.
  7359. Value *Op0, *Op1;
  7360. bool IsAnd = false;
  7361. if (match(ExitCond, m_LogicalAnd(m_Value(Op0), m_Value(Op1))))
  7362. IsAnd = true;
  7363. else if (match(ExitCond, m_LogicalOr(m_Value(Op0), m_Value(Op1))))
  7364. IsAnd = false;
  7365. else
  7366. return None;
  7367. // EitherMayExit is true in these two cases:
  7368. // br (and Op0 Op1), loop, exit
  7369. // br (or Op0 Op1), exit, loop
  7370. bool EitherMayExit = IsAnd ^ ExitIfTrue;
  7371. ExitLimit EL0 = computeExitLimitFromCondCached(Cache, L, Op0, ExitIfTrue,
  7372. ControlsExit && !EitherMayExit,
  7373. AllowPredicates);
  7374. ExitLimit EL1 = computeExitLimitFromCondCached(Cache, L, Op1, ExitIfTrue,
  7375. ControlsExit && !EitherMayExit,
  7376. AllowPredicates);
  7377. // Be robust against unsimplified IR for the form "op i1 X, NeutralElement"
  7378. const Constant *NeutralElement = ConstantInt::get(ExitCond->getType(), IsAnd);
  7379. if (isa<ConstantInt>(Op1))
  7380. return Op1 == NeutralElement ? EL0 : EL1;
  7381. if (isa<ConstantInt>(Op0))
  7382. return Op0 == NeutralElement ? EL1 : EL0;
  7383. const SCEV *BECount = getCouldNotCompute();
  7384. const SCEV *MaxBECount = getCouldNotCompute();
  7385. if (EitherMayExit) {
  7386. // Both conditions must be same for the loop to continue executing.
  7387. // Choose the less conservative count.
  7388. if (EL0.ExactNotTaken != getCouldNotCompute() &&
  7389. EL1.ExactNotTaken != getCouldNotCompute()) {
  7390. BECount = getUMinFromMismatchedTypes(
  7391. EL0.ExactNotTaken, EL1.ExactNotTaken,
  7392. /*Sequential=*/!isa<BinaryOperator>(ExitCond));
  7393. // If EL0.ExactNotTaken was zero and ExitCond was a short-circuit form,
  7394. // it should have been simplified to zero (see the condition (3) above)
  7395. assert(!isa<BinaryOperator>(ExitCond) || !EL0.ExactNotTaken->isZero() ||
  7396. BECount->isZero());
  7397. }
  7398. if (EL0.MaxNotTaken == getCouldNotCompute())
  7399. MaxBECount = EL1.MaxNotTaken;
  7400. else if (EL1.MaxNotTaken == getCouldNotCompute())
  7401. MaxBECount = EL0.MaxNotTaken;
  7402. else
  7403. MaxBECount = getUMinFromMismatchedTypes(EL0.MaxNotTaken, EL1.MaxNotTaken);
  7404. } else {
  7405. // Both conditions must be same at the same time for the loop to exit.
  7406. // For now, be conservative.
  7407. if (EL0.ExactNotTaken == EL1.ExactNotTaken)
  7408. BECount = EL0.ExactNotTaken;
  7409. }
  7410. // There are cases (e.g. PR26207) where computeExitLimitFromCond is able
  7411. // to be more aggressive when computing BECount than when computing
  7412. // MaxBECount. In these cases it is possible for EL0.ExactNotTaken and
  7413. // EL1.ExactNotTaken to match, but for EL0.MaxNotTaken and EL1.MaxNotTaken
  7414. // to not.
  7415. if (isa<SCEVCouldNotCompute>(MaxBECount) &&
  7416. !isa<SCEVCouldNotCompute>(BECount))
  7417. MaxBECount = getConstant(getUnsignedRangeMax(BECount));
  7418. return ExitLimit(BECount, MaxBECount, false,
  7419. { &EL0.Predicates, &EL1.Predicates });
  7420. }
  7421. ScalarEvolution::ExitLimit
  7422. ScalarEvolution::computeExitLimitFromICmp(const Loop *L,
  7423. ICmpInst *ExitCond,
  7424. bool ExitIfTrue,
  7425. bool ControlsExit,
  7426. bool AllowPredicates) {
  7427. // If the condition was exit on true, convert the condition to exit on false
  7428. ICmpInst::Predicate Pred;
  7429. if (!ExitIfTrue)
  7430. Pred = ExitCond->getPredicate();
  7431. else
  7432. Pred = ExitCond->getInversePredicate();
  7433. const ICmpInst::Predicate OriginalPred = Pred;
  7434. const SCEV *LHS = getSCEV(ExitCond->getOperand(0));
  7435. const SCEV *RHS = getSCEV(ExitCond->getOperand(1));
  7436. ExitLimit EL = computeExitLimitFromICmp(L, Pred, LHS, RHS, ControlsExit,
  7437. AllowPredicates);
  7438. if (EL.hasAnyInfo()) return EL;
  7439. auto *ExhaustiveCount =
  7440. computeExitCountExhaustively(L, ExitCond, ExitIfTrue);
  7441. if (!isa<SCEVCouldNotCompute>(ExhaustiveCount))
  7442. return ExhaustiveCount;
  7443. return computeShiftCompareExitLimit(ExitCond->getOperand(0),
  7444. ExitCond->getOperand(1), L, OriginalPred);
  7445. }
  7446. ScalarEvolution::ExitLimit
  7447. ScalarEvolution::computeExitLimitFromICmp(const Loop *L,
  7448. ICmpInst::Predicate Pred,
  7449. const SCEV *LHS, const SCEV *RHS,
  7450. bool ControlsExit,
  7451. bool AllowPredicates) {
  7452. // Try to evaluate any dependencies out of the loop.
  7453. LHS = getSCEVAtScope(LHS, L);
  7454. RHS = getSCEVAtScope(RHS, L);
  7455. // At this point, we would like to compute how many iterations of the
  7456. // loop the predicate will return true for these inputs.
  7457. if (isLoopInvariant(LHS, L) && !isLoopInvariant(RHS, L)) {
  7458. // If there is a loop-invariant, force it into the RHS.
  7459. std::swap(LHS, RHS);
  7460. Pred = ICmpInst::getSwappedPredicate(Pred);
  7461. }
  7462. bool ControllingFiniteLoop =
  7463. ControlsExit && loopHasNoAbnormalExits(L) && loopIsFiniteByAssumption(L);
  7464. // Simplify the operands before analyzing them.
  7465. (void)SimplifyICmpOperands(Pred, LHS, RHS, /*Depth=*/0,
  7466. ControllingFiniteLoop);
  7467. // If we have a comparison of a chrec against a constant, try to use value
  7468. // ranges to answer this query.
  7469. if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS))
  7470. if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(LHS))
  7471. if (AddRec->getLoop() == L) {
  7472. // Form the constant range.
  7473. ConstantRange CompRange =
  7474. ConstantRange::makeExactICmpRegion(Pred, RHSC->getAPInt());
  7475. const SCEV *Ret = AddRec->getNumIterationsInRange(CompRange, *this);
  7476. if (!isa<SCEVCouldNotCompute>(Ret)) return Ret;
  7477. }
  7478. // If this loop must exit based on this condition (or execute undefined
  7479. // behaviour), and we can prove the test sequence produced must repeat
  7480. // the same values on self-wrap of the IV, then we can infer that IV
  7481. // doesn't self wrap because if it did, we'd have an infinite (undefined)
  7482. // loop.
  7483. if (ControllingFiniteLoop && isLoopInvariant(RHS, L)) {
  7484. // TODO: We can peel off any functions which are invertible *in L*. Loop
  7485. // invariant terms are effectively constants for our purposes here.
  7486. auto *InnerLHS = LHS;
  7487. if (auto *ZExt = dyn_cast<SCEVZeroExtendExpr>(LHS))
  7488. InnerLHS = ZExt->getOperand();
  7489. if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(InnerLHS)) {
  7490. auto *StrideC = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this));
  7491. if (!AR->hasNoSelfWrap() && AR->getLoop() == L && AR->isAffine() &&
  7492. StrideC && StrideC->getAPInt().isPowerOf2()) {
  7493. auto Flags = AR->getNoWrapFlags();
  7494. Flags = setFlags(Flags, SCEV::FlagNW);
  7495. SmallVector<const SCEV*> Operands{AR->operands()};
  7496. Flags = StrengthenNoWrapFlags(this, scAddRecExpr, Operands, Flags);
  7497. setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), Flags);
  7498. }
  7499. }
  7500. }
  7501. switch (Pred) {
  7502. case ICmpInst::ICMP_NE: { // while (X != Y)
  7503. // Convert to: while (X-Y != 0)
  7504. if (LHS->getType()->isPointerTy()) {
  7505. LHS = getLosslessPtrToIntExpr(LHS);
  7506. if (isa<SCEVCouldNotCompute>(LHS))
  7507. return LHS;
  7508. }
  7509. if (RHS->getType()->isPointerTy()) {
  7510. RHS = getLosslessPtrToIntExpr(RHS);
  7511. if (isa<SCEVCouldNotCompute>(RHS))
  7512. return RHS;
  7513. }
  7514. ExitLimit EL = howFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit,
  7515. AllowPredicates);
  7516. if (EL.hasAnyInfo()) return EL;
  7517. break;
  7518. }
  7519. case ICmpInst::ICMP_EQ: { // while (X == Y)
  7520. // Convert to: while (X-Y == 0)
  7521. if (LHS->getType()->isPointerTy()) {
  7522. LHS = getLosslessPtrToIntExpr(LHS);
  7523. if (isa<SCEVCouldNotCompute>(LHS))
  7524. return LHS;
  7525. }
  7526. if (RHS->getType()->isPointerTy()) {
  7527. RHS = getLosslessPtrToIntExpr(RHS);
  7528. if (isa<SCEVCouldNotCompute>(RHS))
  7529. return RHS;
  7530. }
  7531. ExitLimit EL = howFarToNonZero(getMinusSCEV(LHS, RHS), L);
  7532. if (EL.hasAnyInfo()) return EL;
  7533. break;
  7534. }
  7535. case ICmpInst::ICMP_SLT:
  7536. case ICmpInst::ICMP_ULT: { // while (X < Y)
  7537. bool IsSigned = Pred == ICmpInst::ICMP_SLT;
  7538. ExitLimit EL = howManyLessThans(LHS, RHS, L, IsSigned, ControlsExit,
  7539. AllowPredicates);
  7540. if (EL.hasAnyInfo()) return EL;
  7541. break;
  7542. }
  7543. case ICmpInst::ICMP_SGT:
  7544. case ICmpInst::ICMP_UGT: { // while (X > Y)
  7545. bool IsSigned = Pred == ICmpInst::ICMP_SGT;
  7546. ExitLimit EL =
  7547. howManyGreaterThans(LHS, RHS, L, IsSigned, ControlsExit,
  7548. AllowPredicates);
  7549. if (EL.hasAnyInfo()) return EL;
  7550. break;
  7551. }
  7552. default:
  7553. break;
  7554. }
  7555. return getCouldNotCompute();
  7556. }
  7557. ScalarEvolution::ExitLimit
  7558. ScalarEvolution::computeExitLimitFromSingleExitSwitch(const Loop *L,
  7559. SwitchInst *Switch,
  7560. BasicBlock *ExitingBlock,
  7561. bool ControlsExit) {
  7562. assert(!L->contains(ExitingBlock) && "Not an exiting block!");
  7563. // Give up if the exit is the default dest of a switch.
  7564. if (Switch->getDefaultDest() == ExitingBlock)
  7565. return getCouldNotCompute();
  7566. assert(L->contains(Switch->getDefaultDest()) &&
  7567. "Default case must not exit the loop!");
  7568. const SCEV *LHS = getSCEVAtScope(Switch->getCondition(), L);
  7569. const SCEV *RHS = getConstant(Switch->findCaseDest(ExitingBlock));
  7570. // while (X != Y) --> while (X-Y != 0)
  7571. ExitLimit EL = howFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit);
  7572. if (EL.hasAnyInfo())
  7573. return EL;
  7574. return getCouldNotCompute();
  7575. }
  7576. static ConstantInt *
  7577. EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C,
  7578. ScalarEvolution &SE) {
  7579. const SCEV *InVal = SE.getConstant(C);
  7580. const SCEV *Val = AddRec->evaluateAtIteration(InVal, SE);
  7581. assert(isa<SCEVConstant>(Val) &&
  7582. "Evaluation of SCEV at constant didn't fold correctly?");
  7583. return cast<SCEVConstant>(Val)->getValue();
  7584. }
  7585. ScalarEvolution::ExitLimit ScalarEvolution::computeShiftCompareExitLimit(
  7586. Value *LHS, Value *RHSV, const Loop *L, ICmpInst::Predicate Pred) {
  7587. ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV);
  7588. if (!RHS)
  7589. return getCouldNotCompute();
  7590. const BasicBlock *Latch = L->getLoopLatch();
  7591. if (!Latch)
  7592. return getCouldNotCompute();
  7593. const BasicBlock *Predecessor = L->getLoopPredecessor();
  7594. if (!Predecessor)
  7595. return getCouldNotCompute();
  7596. // Return true if V is of the form "LHS `shift_op` <positive constant>".
  7597. // Return LHS in OutLHS and shift_opt in OutOpCode.
  7598. auto MatchPositiveShift =
  7599. [](Value *V, Value *&OutLHS, Instruction::BinaryOps &OutOpCode) {
  7600. using namespace PatternMatch;
  7601. ConstantInt *ShiftAmt;
  7602. if (match(V, m_LShr(m_Value(OutLHS), m_ConstantInt(ShiftAmt))))
  7603. OutOpCode = Instruction::LShr;
  7604. else if (match(V, m_AShr(m_Value(OutLHS), m_ConstantInt(ShiftAmt))))
  7605. OutOpCode = Instruction::AShr;
  7606. else if (match(V, m_Shl(m_Value(OutLHS), m_ConstantInt(ShiftAmt))))
  7607. OutOpCode = Instruction::Shl;
  7608. else
  7609. return false;
  7610. return ShiftAmt->getValue().isStrictlyPositive();
  7611. };
  7612. // Recognize a "shift recurrence" either of the form %iv or of %iv.shifted in
  7613. //
  7614. // loop:
  7615. // %iv = phi i32 [ %iv.shifted, %loop ], [ %val, %preheader ]
  7616. // %iv.shifted = lshr i32 %iv, <positive constant>
  7617. //
  7618. // Return true on a successful match. Return the corresponding PHI node (%iv
  7619. // above) in PNOut and the opcode of the shift operation in OpCodeOut.
  7620. auto MatchShiftRecurrence =
  7621. [&](Value *V, PHINode *&PNOut, Instruction::BinaryOps &OpCodeOut) {
  7622. Optional<Instruction::BinaryOps> PostShiftOpCode;
  7623. {
  7624. Instruction::BinaryOps OpC;
  7625. Value *V;
  7626. // If we encounter a shift instruction, "peel off" the shift operation,
  7627. // and remember that we did so. Later when we inspect %iv's backedge
  7628. // value, we will make sure that the backedge value uses the same
  7629. // operation.
  7630. //
  7631. // Note: the peeled shift operation does not have to be the same
  7632. // instruction as the one feeding into the PHI's backedge value. We only
  7633. // really care about it being the same *kind* of shift instruction --
  7634. // that's all that is required for our later inferences to hold.
  7635. if (MatchPositiveShift(LHS, V, OpC)) {
  7636. PostShiftOpCode = OpC;
  7637. LHS = V;
  7638. }
  7639. }
  7640. PNOut = dyn_cast<PHINode>(LHS);
  7641. if (!PNOut || PNOut->getParent() != L->getHeader())
  7642. return false;
  7643. Value *BEValue = PNOut->getIncomingValueForBlock(Latch);
  7644. Value *OpLHS;
  7645. return
  7646. // The backedge value for the PHI node must be a shift by a positive
  7647. // amount
  7648. MatchPositiveShift(BEValue, OpLHS, OpCodeOut) &&
  7649. // of the PHI node itself
  7650. OpLHS == PNOut &&
  7651. // and the kind of shift should be match the kind of shift we peeled
  7652. // off, if any.
  7653. (!PostShiftOpCode.hasValue() || *PostShiftOpCode == OpCodeOut);
  7654. };
  7655. PHINode *PN;
  7656. Instruction::BinaryOps OpCode;
  7657. if (!MatchShiftRecurrence(LHS, PN, OpCode))
  7658. return getCouldNotCompute();
  7659. const DataLayout &DL = getDataLayout();
  7660. // The key rationale for this optimization is that for some kinds of shift
  7661. // recurrences, the value of the recurrence "stabilizes" to either 0 or -1
  7662. // within a finite number of iterations. If the condition guarding the
  7663. // backedge (in the sense that the backedge is taken if the condition is true)
  7664. // is false for the value the shift recurrence stabilizes to, then we know
  7665. // that the backedge is taken only a finite number of times.
  7666. ConstantInt *StableValue = nullptr;
  7667. switch (OpCode) {
  7668. default:
  7669. llvm_unreachable("Impossible case!");
  7670. case Instruction::AShr: {
  7671. // {K,ashr,<positive-constant>} stabilizes to signum(K) in at most
  7672. // bitwidth(K) iterations.
  7673. Value *FirstValue = PN->getIncomingValueForBlock(Predecessor);
  7674. KnownBits Known = computeKnownBits(FirstValue, DL, 0, &AC,
  7675. Predecessor->getTerminator(), &DT);
  7676. auto *Ty = cast<IntegerType>(RHS->getType());
  7677. if (Known.isNonNegative())
  7678. StableValue = ConstantInt::get(Ty, 0);
  7679. else if (Known.isNegative())
  7680. StableValue = ConstantInt::get(Ty, -1, true);
  7681. else
  7682. return getCouldNotCompute();
  7683. break;
  7684. }
  7685. case Instruction::LShr:
  7686. case Instruction::Shl:
  7687. // Both {K,lshr,<positive-constant>} and {K,shl,<positive-constant>}
  7688. // stabilize to 0 in at most bitwidth(K) iterations.
  7689. StableValue = ConstantInt::get(cast<IntegerType>(RHS->getType()), 0);
  7690. break;
  7691. }
  7692. auto *Result =
  7693. ConstantFoldCompareInstOperands(Pred, StableValue, RHS, DL, &TLI);
  7694. assert(Result->getType()->isIntegerTy(1) &&
  7695. "Otherwise cannot be an operand to a branch instruction");
  7696. if (Result->isZeroValue()) {
  7697. unsigned BitWidth = getTypeSizeInBits(RHS->getType());
  7698. const SCEV *UpperBound =
  7699. getConstant(getEffectiveSCEVType(RHS->getType()), BitWidth);
  7700. return ExitLimit(getCouldNotCompute(), UpperBound, false);
  7701. }
  7702. return getCouldNotCompute();
  7703. }
  7704. /// Return true if we can constant fold an instruction of the specified type,
  7705. /// assuming that all operands were constants.
  7706. static bool CanConstantFold(const Instruction *I) {
  7707. if (isa<BinaryOperator>(I) || isa<CmpInst>(I) ||
  7708. isa<SelectInst>(I) || isa<CastInst>(I) || isa<GetElementPtrInst>(I) ||
  7709. isa<LoadInst>(I) || isa<ExtractValueInst>(I))
  7710. return true;
  7711. if (const CallInst *CI = dyn_cast<CallInst>(I))
  7712. if (const Function *F = CI->getCalledFunction())
  7713. return canConstantFoldCallTo(CI, F);
  7714. return false;
  7715. }
  7716. /// Determine whether this instruction can constant evolve within this loop
  7717. /// assuming its operands can all constant evolve.
  7718. static bool canConstantEvolve(Instruction *I, const Loop *L) {
  7719. // An instruction outside of the loop can't be derived from a loop PHI.
  7720. if (!L->contains(I)) return false;
  7721. if (isa<PHINode>(I)) {
  7722. // We don't currently keep track of the control flow needed to evaluate
  7723. // PHIs, so we cannot handle PHIs inside of loops.
  7724. return L->getHeader() == I->getParent();
  7725. }
  7726. // If we won't be able to constant fold this expression even if the operands
  7727. // are constants, bail early.
  7728. return CanConstantFold(I);
  7729. }
  7730. /// getConstantEvolvingPHIOperands - Implement getConstantEvolvingPHI by
  7731. /// recursing through each instruction operand until reaching a loop header phi.
  7732. static PHINode *
  7733. getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L,
  7734. DenseMap<Instruction *, PHINode *> &PHIMap,
  7735. unsigned Depth) {
  7736. if (Depth > MaxConstantEvolvingDepth)
  7737. return nullptr;
  7738. // Otherwise, we can evaluate this instruction if all of its operands are
  7739. // constant or derived from a PHI node themselves.
  7740. PHINode *PHI = nullptr;
  7741. for (Value *Op : UseInst->operands()) {
  7742. if (isa<Constant>(Op)) continue;
  7743. Instruction *OpInst = dyn_cast<Instruction>(Op);
  7744. if (!OpInst || !canConstantEvolve(OpInst, L)) return nullptr;
  7745. PHINode *P = dyn_cast<PHINode>(OpInst);
  7746. if (!P)
  7747. // If this operand is already visited, reuse the prior result.
  7748. // We may have P != PHI if this is the deepest point at which the
  7749. // inconsistent paths meet.
  7750. P = PHIMap.lookup(OpInst);
  7751. if (!P) {
  7752. // Recurse and memoize the results, whether a phi is found or not.
  7753. // This recursive call invalidates pointers into PHIMap.
  7754. P = getConstantEvolvingPHIOperands(OpInst, L, PHIMap, Depth + 1);
  7755. PHIMap[OpInst] = P;
  7756. }
  7757. if (!P)
  7758. return nullptr; // Not evolving from PHI
  7759. if (PHI && PHI != P)
  7760. return nullptr; // Evolving from multiple different PHIs.
  7761. PHI = P;
  7762. }
  7763. // This is a expression evolving from a constant PHI!
  7764. return PHI;
  7765. }
  7766. /// getConstantEvolvingPHI - Given an LLVM value and a loop, return a PHI node
  7767. /// in the loop that V is derived from. We allow arbitrary operations along the
  7768. /// way, but the operands of an operation must either be constants or a value
  7769. /// derived from a constant PHI. If this expression does not fit with these
  7770. /// constraints, return null.
  7771. static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) {
  7772. Instruction *I = dyn_cast<Instruction>(V);
  7773. if (!I || !canConstantEvolve(I, L)) return nullptr;
  7774. if (PHINode *PN = dyn_cast<PHINode>(I))
  7775. return PN;
  7776. // Record non-constant instructions contained by the loop.
  7777. DenseMap<Instruction *, PHINode *> PHIMap;
  7778. return getConstantEvolvingPHIOperands(I, L, PHIMap, 0);
  7779. }
  7780. /// EvaluateExpression - Given an expression that passes the
  7781. /// getConstantEvolvingPHI predicate, evaluate its value assuming the PHI node
  7782. /// in the loop has the value PHIVal. If we can't fold this expression for some
  7783. /// reason, return null.
  7784. static Constant *EvaluateExpression(Value *V, const Loop *L,
  7785. DenseMap<Instruction *, Constant *> &Vals,
  7786. const DataLayout &DL,
  7787. const TargetLibraryInfo *TLI) {
  7788. // Convenient constant check, but redundant for recursive calls.
  7789. if (Constant *C = dyn_cast<Constant>(V)) return C;
  7790. Instruction *I = dyn_cast<Instruction>(V);
  7791. if (!I) return nullptr;
  7792. if (Constant *C = Vals.lookup(I)) return C;
  7793. // An instruction inside the loop depends on a value outside the loop that we
  7794. // weren't given a mapping for, or a value such as a call inside the loop.
  7795. if (!canConstantEvolve(I, L)) return nullptr;
  7796. // An unmapped PHI can be due to a branch or another loop inside this loop,
  7797. // or due to this not being the initial iteration through a loop where we
  7798. // couldn't compute the evolution of this particular PHI last time.
  7799. if (isa<PHINode>(I)) return nullptr;
  7800. std::vector<Constant*> Operands(I->getNumOperands());
  7801. for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
  7802. Instruction *Operand = dyn_cast<Instruction>(I->getOperand(i));
  7803. if (!Operand) {
  7804. Operands[i] = dyn_cast<Constant>(I->getOperand(i));
  7805. if (!Operands[i]) return nullptr;
  7806. continue;
  7807. }
  7808. Constant *C = EvaluateExpression(Operand, L, Vals, DL, TLI);
  7809. Vals[Operand] = C;
  7810. if (!C) return nullptr;
  7811. Operands[i] = C;
  7812. }
  7813. if (CmpInst *CI = dyn_cast<CmpInst>(I))
  7814. return ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0],
  7815. Operands[1], DL, TLI);
  7816. if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
  7817. if (!LI->isVolatile())
  7818. return ConstantFoldLoadFromConstPtr(Operands[0], LI->getType(), DL);
  7819. }
  7820. return ConstantFoldInstOperands(I, Operands, DL, TLI);
  7821. }
  7822. // If every incoming value to PN except the one for BB is a specific Constant,
  7823. // return that, else return nullptr.
  7824. static Constant *getOtherIncomingValue(PHINode *PN, BasicBlock *BB) {
  7825. Constant *IncomingVal = nullptr;
  7826. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
  7827. if (PN->getIncomingBlock(i) == BB)
  7828. continue;
  7829. auto *CurrentVal = dyn_cast<Constant>(PN->getIncomingValue(i));
  7830. if (!CurrentVal)
  7831. return nullptr;
  7832. if (IncomingVal != CurrentVal) {
  7833. if (IncomingVal)
  7834. return nullptr;
  7835. IncomingVal = CurrentVal;
  7836. }
  7837. }
  7838. return IncomingVal;
  7839. }
  7840. /// getConstantEvolutionLoopExitValue - If we know that the specified Phi is
  7841. /// in the header of its containing loop, we know the loop executes a
  7842. /// constant number of times, and the PHI node is just a recurrence
  7843. /// involving constants, fold it.
  7844. Constant *
  7845. ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
  7846. const APInt &BEs,
  7847. const Loop *L) {
  7848. auto I = ConstantEvolutionLoopExitValue.find(PN);
  7849. if (I != ConstantEvolutionLoopExitValue.end())
  7850. return I->second;
  7851. if (BEs.ugt(MaxBruteForceIterations))
  7852. return ConstantEvolutionLoopExitValue[PN] = nullptr; // Not going to evaluate it.
  7853. Constant *&RetVal = ConstantEvolutionLoopExitValue[PN];
  7854. DenseMap<Instruction *, Constant *> CurrentIterVals;
  7855. BasicBlock *Header = L->getHeader();
  7856. assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!");
  7857. BasicBlock *Latch = L->getLoopLatch();
  7858. if (!Latch)
  7859. return nullptr;
  7860. for (PHINode &PHI : Header->phis()) {
  7861. if (auto *StartCST = getOtherIncomingValue(&PHI, Latch))
  7862. CurrentIterVals[&PHI] = StartCST;
  7863. }
  7864. if (!CurrentIterVals.count(PN))
  7865. return RetVal = nullptr;
  7866. Value *BEValue = PN->getIncomingValueForBlock(Latch);
  7867. // Execute the loop symbolically to determine the exit value.
  7868. assert(BEs.getActiveBits() < CHAR_BIT * sizeof(unsigned) &&
  7869. "BEs is <= MaxBruteForceIterations which is an 'unsigned'!");
  7870. unsigned NumIterations = BEs.getZExtValue(); // must be in range
  7871. unsigned IterationNum = 0;
  7872. const DataLayout &DL = getDataLayout();
  7873. for (; ; ++IterationNum) {
  7874. if (IterationNum == NumIterations)
  7875. return RetVal = CurrentIterVals[PN]; // Got exit value!
  7876. // Compute the value of the PHIs for the next iteration.
  7877. // EvaluateExpression adds non-phi values to the CurrentIterVals map.
  7878. DenseMap<Instruction *, Constant *> NextIterVals;
  7879. Constant *NextPHI =
  7880. EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI);
  7881. if (!NextPHI)
  7882. return nullptr; // Couldn't evaluate!
  7883. NextIterVals[PN] = NextPHI;
  7884. bool StoppedEvolving = NextPHI == CurrentIterVals[PN];
  7885. // Also evaluate the other PHI nodes. However, we don't get to stop if we
  7886. // cease to be able to evaluate one of them or if they stop evolving,
  7887. // because that doesn't necessarily prevent us from computing PN.
  7888. SmallVector<std::pair<PHINode *, Constant *>, 8> PHIsToCompute;
  7889. for (const auto &I : CurrentIterVals) {
  7890. PHINode *PHI = dyn_cast<PHINode>(I.first);
  7891. if (!PHI || PHI == PN || PHI->getParent() != Header) continue;
  7892. PHIsToCompute.emplace_back(PHI, I.second);
  7893. }
  7894. // We use two distinct loops because EvaluateExpression may invalidate any
  7895. // iterators into CurrentIterVals.
  7896. for (const auto &I : PHIsToCompute) {
  7897. PHINode *PHI = I.first;
  7898. Constant *&NextPHI = NextIterVals[PHI];
  7899. if (!NextPHI) { // Not already computed.
  7900. Value *BEValue = PHI->getIncomingValueForBlock(Latch);
  7901. NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI);
  7902. }
  7903. if (NextPHI != I.second)
  7904. StoppedEvolving = false;
  7905. }
  7906. // If all entries in CurrentIterVals == NextIterVals then we can stop
  7907. // iterating, the loop can't continue to change.
  7908. if (StoppedEvolving)
  7909. return RetVal = CurrentIterVals[PN];
  7910. CurrentIterVals.swap(NextIterVals);
  7911. }
  7912. }
  7913. const SCEV *ScalarEvolution::computeExitCountExhaustively(const Loop *L,
  7914. Value *Cond,
  7915. bool ExitWhen) {
  7916. PHINode *PN = getConstantEvolvingPHI(Cond, L);
  7917. if (!PN) return getCouldNotCompute();
  7918. // If the loop is canonicalized, the PHI will have exactly two entries.
  7919. // That's the only form we support here.
  7920. if (PN->getNumIncomingValues() != 2) return getCouldNotCompute();
  7921. DenseMap<Instruction *, Constant *> CurrentIterVals;
  7922. BasicBlock *Header = L->getHeader();
  7923. assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!");
  7924. BasicBlock *Latch = L->getLoopLatch();
  7925. assert(Latch && "Should follow from NumIncomingValues == 2!");
  7926. for (PHINode &PHI : Header->phis()) {
  7927. if (auto *StartCST = getOtherIncomingValue(&PHI, Latch))
  7928. CurrentIterVals[&PHI] = StartCST;
  7929. }
  7930. if (!CurrentIterVals.count(PN))
  7931. return getCouldNotCompute();
  7932. // Okay, we find a PHI node that defines the trip count of this loop. Execute
  7933. // the loop symbolically to determine when the condition gets a value of
  7934. // "ExitWhen".
  7935. unsigned MaxIterations = MaxBruteForceIterations; // Limit analysis.
  7936. const DataLayout &DL = getDataLayout();
  7937. for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){
  7938. auto *CondVal = dyn_cast_or_null<ConstantInt>(
  7939. EvaluateExpression(Cond, L, CurrentIterVals, DL, &TLI));
  7940. // Couldn't symbolically evaluate.
  7941. if (!CondVal) return getCouldNotCompute();
  7942. if (CondVal->getValue() == uint64_t(ExitWhen)) {
  7943. ++NumBruteForceTripCountsComputed;
  7944. return getConstant(Type::getInt32Ty(getContext()), IterationNum);
  7945. }
  7946. // Update all the PHI nodes for the next iteration.
  7947. DenseMap<Instruction *, Constant *> NextIterVals;
  7948. // Create a list of which PHIs we need to compute. We want to do this before
  7949. // calling EvaluateExpression on them because that may invalidate iterators
  7950. // into CurrentIterVals.
  7951. SmallVector<PHINode *, 8> PHIsToCompute;
  7952. for (const auto &I : CurrentIterVals) {
  7953. PHINode *PHI = dyn_cast<PHINode>(I.first);
  7954. if (!PHI || PHI->getParent() != Header) continue;
  7955. PHIsToCompute.push_back(PHI);
  7956. }
  7957. for (PHINode *PHI : PHIsToCompute) {
  7958. Constant *&NextPHI = NextIterVals[PHI];
  7959. if (NextPHI) continue; // Already computed!
  7960. Value *BEValue = PHI->getIncomingValueForBlock(Latch);
  7961. NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI);
  7962. }
  7963. CurrentIterVals.swap(NextIterVals);
  7964. }
  7965. // Too many iterations were needed to evaluate.
  7966. return getCouldNotCompute();
  7967. }
  7968. const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
  7969. SmallVector<std::pair<const Loop *, const SCEV *>, 2> &Values =
  7970. ValuesAtScopes[V];
  7971. // Check to see if we've folded this expression at this loop before.
  7972. for (auto &LS : Values)
  7973. if (LS.first == L)
  7974. return LS.second ? LS.second : V;
  7975. Values.emplace_back(L, nullptr);
  7976. // Otherwise compute it.
  7977. const SCEV *C = computeSCEVAtScope(V, L);
  7978. for (auto &LS : reverse(ValuesAtScopes[V]))
  7979. if (LS.first == L) {
  7980. LS.second = C;
  7981. if (!isa<SCEVConstant>(C))
  7982. ValuesAtScopesUsers[C].push_back({L, V});
  7983. break;
  7984. }
  7985. return C;
  7986. }
  7987. /// This builds up a Constant using the ConstantExpr interface. That way, we
  7988. /// will return Constants for objects which aren't represented by a
  7989. /// SCEVConstant, because SCEVConstant is restricted to ConstantInt.
  7990. /// Returns NULL if the SCEV isn't representable as a Constant.
  7991. static Constant *BuildConstantFromSCEV(const SCEV *V) {
  7992. switch (V->getSCEVType()) {
  7993. case scCouldNotCompute:
  7994. case scAddRecExpr:
  7995. return nullptr;
  7996. case scConstant:
  7997. return cast<SCEVConstant>(V)->getValue();
  7998. case scUnknown:
  7999. return dyn_cast<Constant>(cast<SCEVUnknown>(V)->getValue());
  8000. case scSignExtend: {
  8001. const SCEVSignExtendExpr *SS = cast<SCEVSignExtendExpr>(V);
  8002. if (Constant *CastOp = BuildConstantFromSCEV(SS->getOperand()))
  8003. return ConstantExpr::getSExt(CastOp, SS->getType());
  8004. return nullptr;
  8005. }
  8006. case scZeroExtend: {
  8007. const SCEVZeroExtendExpr *SZ = cast<SCEVZeroExtendExpr>(V);
  8008. if (Constant *CastOp = BuildConstantFromSCEV(SZ->getOperand()))
  8009. return ConstantExpr::getZExt(CastOp, SZ->getType());
  8010. return nullptr;
  8011. }
  8012. case scPtrToInt: {
  8013. const SCEVPtrToIntExpr *P2I = cast<SCEVPtrToIntExpr>(V);
  8014. if (Constant *CastOp = BuildConstantFromSCEV(P2I->getOperand()))
  8015. return ConstantExpr::getPtrToInt(CastOp, P2I->getType());
  8016. return nullptr;
  8017. }
  8018. case scTruncate: {
  8019. const SCEVTruncateExpr *ST = cast<SCEVTruncateExpr>(V);
  8020. if (Constant *CastOp = BuildConstantFromSCEV(ST->getOperand()))
  8021. return ConstantExpr::getTrunc(CastOp, ST->getType());
  8022. return nullptr;
  8023. }
  8024. case scAddExpr: {
  8025. const SCEVAddExpr *SA = cast<SCEVAddExpr>(V);
  8026. if (Constant *C = BuildConstantFromSCEV(SA->getOperand(0))) {
  8027. if (PointerType *PTy = dyn_cast<PointerType>(C->getType())) {
  8028. unsigned AS = PTy->getAddressSpace();
  8029. Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS);
  8030. C = ConstantExpr::getBitCast(C, DestPtrTy);
  8031. }
  8032. for (unsigned i = 1, e = SA->getNumOperands(); i != e; ++i) {
  8033. Constant *C2 = BuildConstantFromSCEV(SA->getOperand(i));
  8034. if (!C2)
  8035. return nullptr;
  8036. // First pointer!
  8037. if (!C->getType()->isPointerTy() && C2->getType()->isPointerTy()) {
  8038. unsigned AS = C2->getType()->getPointerAddressSpace();
  8039. std::swap(C, C2);
  8040. Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS);
  8041. // The offsets have been converted to bytes. We can add bytes to an
  8042. // i8* by GEP with the byte count in the first index.
  8043. C = ConstantExpr::getBitCast(C, DestPtrTy);
  8044. }
  8045. // Don't bother trying to sum two pointers. We probably can't
  8046. // statically compute a load that results from it anyway.
  8047. if (C2->getType()->isPointerTy())
  8048. return nullptr;
  8049. if (C->getType()->isPointerTy()) {
  8050. C = ConstantExpr::getGetElementPtr(Type::getInt8Ty(C->getContext()),
  8051. C, C2);
  8052. } else {
  8053. C = ConstantExpr::getAdd(C, C2);
  8054. }
  8055. }
  8056. return C;
  8057. }
  8058. return nullptr;
  8059. }
  8060. case scMulExpr: {
  8061. const SCEVMulExpr *SM = cast<SCEVMulExpr>(V);
  8062. if (Constant *C = BuildConstantFromSCEV(SM->getOperand(0))) {
  8063. // Don't bother with pointers at all.
  8064. if (C->getType()->isPointerTy())
  8065. return nullptr;
  8066. for (unsigned i = 1, e = SM->getNumOperands(); i != e; ++i) {
  8067. Constant *C2 = BuildConstantFromSCEV(SM->getOperand(i));
  8068. if (!C2 || C2->getType()->isPointerTy())
  8069. return nullptr;
  8070. C = ConstantExpr::getMul(C, C2);
  8071. }
  8072. return C;
  8073. }
  8074. return nullptr;
  8075. }
  8076. case scUDivExpr: {
  8077. const SCEVUDivExpr *SU = cast<SCEVUDivExpr>(V);
  8078. if (Constant *LHS = BuildConstantFromSCEV(SU->getLHS()))
  8079. if (Constant *RHS = BuildConstantFromSCEV(SU->getRHS()))
  8080. if (LHS->getType() == RHS->getType())
  8081. return ConstantExpr::getUDiv(LHS, RHS);
  8082. return nullptr;
  8083. }
  8084. case scSMaxExpr:
  8085. case scUMaxExpr:
  8086. case scSMinExpr:
  8087. case scUMinExpr:
  8088. case scSequentialUMinExpr:
  8089. return nullptr; // TODO: smax, umax, smin, umax, umin_seq.
  8090. }
  8091. llvm_unreachable("Unknown SCEV kind!");
  8092. }
  8093. const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
  8094. if (isa<SCEVConstant>(V)) return V;
  8095. // If this instruction is evolved from a constant-evolving PHI, compute the
  8096. // exit value from the loop without using SCEVs.
  8097. if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V)) {
  8098. if (Instruction *I = dyn_cast<Instruction>(SU->getValue())) {
  8099. if (PHINode *PN = dyn_cast<PHINode>(I)) {
  8100. const Loop *CurrLoop = this->LI[I->getParent()];
  8101. // Looking for loop exit value.
  8102. if (CurrLoop && CurrLoop->getParentLoop() == L &&
  8103. PN->getParent() == CurrLoop->getHeader()) {
  8104. // Okay, there is no closed form solution for the PHI node. Check
  8105. // to see if the loop that contains it has a known backedge-taken
  8106. // count. If so, we may be able to force computation of the exit
  8107. // value.
  8108. const SCEV *BackedgeTakenCount = getBackedgeTakenCount(CurrLoop);
  8109. // This trivial case can show up in some degenerate cases where
  8110. // the incoming IR has not yet been fully simplified.
  8111. if (BackedgeTakenCount->isZero()) {
  8112. Value *InitValue = nullptr;
  8113. bool MultipleInitValues = false;
  8114. for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
  8115. if (!CurrLoop->contains(PN->getIncomingBlock(i))) {
  8116. if (!InitValue)
  8117. InitValue = PN->getIncomingValue(i);
  8118. else if (InitValue != PN->getIncomingValue(i)) {
  8119. MultipleInitValues = true;
  8120. break;
  8121. }
  8122. }
  8123. }
  8124. if (!MultipleInitValues && InitValue)
  8125. return getSCEV(InitValue);
  8126. }
  8127. // Do we have a loop invariant value flowing around the backedge
  8128. // for a loop which must execute the backedge?
  8129. if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount) &&
  8130. isKnownPositive(BackedgeTakenCount) &&
  8131. PN->getNumIncomingValues() == 2) {
  8132. unsigned InLoopPred =
  8133. CurrLoop->contains(PN->getIncomingBlock(0)) ? 0 : 1;
  8134. Value *BackedgeVal = PN->getIncomingValue(InLoopPred);
  8135. if (CurrLoop->isLoopInvariant(BackedgeVal))
  8136. return getSCEV(BackedgeVal);
  8137. }
  8138. if (auto *BTCC = dyn_cast<SCEVConstant>(BackedgeTakenCount)) {
  8139. // Okay, we know how many times the containing loop executes. If
  8140. // this is a constant evolving PHI node, get the final value at
  8141. // the specified iteration number.
  8142. Constant *RV = getConstantEvolutionLoopExitValue(
  8143. PN, BTCC->getAPInt(), CurrLoop);
  8144. if (RV) return getSCEV(RV);
  8145. }
  8146. }
  8147. // If there is a single-input Phi, evaluate it at our scope. If we can
  8148. // prove that this replacement does not break LCSSA form, use new value.
  8149. if (PN->getNumOperands() == 1) {
  8150. const SCEV *Input = getSCEV(PN->getOperand(0));
  8151. const SCEV *InputAtScope = getSCEVAtScope(Input, L);
  8152. // TODO: We can generalize it using LI.replacementPreservesLCSSAForm,
  8153. // for the simplest case just support constants.
  8154. if (isa<SCEVConstant>(InputAtScope)) return InputAtScope;
  8155. }
  8156. }
  8157. // Okay, this is an expression that we cannot symbolically evaluate
  8158. // into a SCEV. Check to see if it's possible to symbolically evaluate
  8159. // the arguments into constants, and if so, try to constant propagate the
  8160. // result. This is particularly useful for computing loop exit values.
  8161. if (CanConstantFold(I)) {
  8162. SmallVector<Constant *, 4> Operands;
  8163. bool MadeImprovement = false;
  8164. for (Value *Op : I->operands()) {
  8165. if (Constant *C = dyn_cast<Constant>(Op)) {
  8166. Operands.push_back(C);
  8167. continue;
  8168. }
  8169. // If any of the operands is non-constant and if they are
  8170. // non-integer and non-pointer, don't even try to analyze them
  8171. // with scev techniques.
  8172. if (!isSCEVable(Op->getType()))
  8173. return V;
  8174. const SCEV *OrigV = getSCEV(Op);
  8175. const SCEV *OpV = getSCEVAtScope(OrigV, L);
  8176. MadeImprovement |= OrigV != OpV;
  8177. Constant *C = BuildConstantFromSCEV(OpV);
  8178. if (!C) return V;
  8179. if (C->getType() != Op->getType())
  8180. C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
  8181. Op->getType(),
  8182. false),
  8183. C, Op->getType());
  8184. Operands.push_back(C);
  8185. }
  8186. // Check to see if getSCEVAtScope actually made an improvement.
  8187. if (MadeImprovement) {
  8188. Constant *C = nullptr;
  8189. const DataLayout &DL = getDataLayout();
  8190. if (const CmpInst *CI = dyn_cast<CmpInst>(I))
  8191. C = ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0],
  8192. Operands[1], DL, &TLI);
  8193. else if (const LoadInst *Load = dyn_cast<LoadInst>(I)) {
  8194. if (!Load->isVolatile())
  8195. C = ConstantFoldLoadFromConstPtr(Operands[0], Load->getType(),
  8196. DL);
  8197. } else
  8198. C = ConstantFoldInstOperands(I, Operands, DL, &TLI);
  8199. if (!C) return V;
  8200. return getSCEV(C);
  8201. }
  8202. }
  8203. }
  8204. // This is some other type of SCEVUnknown, just return it.
  8205. return V;
  8206. }
  8207. if (isa<SCEVCommutativeExpr>(V) || isa<SCEVSequentialMinMaxExpr>(V)) {
  8208. const auto *Comm = cast<SCEVNAryExpr>(V);
  8209. // Avoid performing the look-up in the common case where the specified
  8210. // expression has no loop-variant portions.
  8211. for (unsigned i = 0, e = Comm->getNumOperands(); i != e; ++i) {
  8212. const SCEV *OpAtScope = getSCEVAtScope(Comm->getOperand(i), L);
  8213. if (OpAtScope != Comm->getOperand(i)) {
  8214. // Okay, at least one of these operands is loop variant but might be
  8215. // foldable. Build a new instance of the folded commutative expression.
  8216. SmallVector<const SCEV *, 8> NewOps(Comm->op_begin(),
  8217. Comm->op_begin()+i);
  8218. NewOps.push_back(OpAtScope);
  8219. for (++i; i != e; ++i) {
  8220. OpAtScope = getSCEVAtScope(Comm->getOperand(i), L);
  8221. NewOps.push_back(OpAtScope);
  8222. }
  8223. if (isa<SCEVAddExpr>(Comm))
  8224. return getAddExpr(NewOps, Comm->getNoWrapFlags());
  8225. if (isa<SCEVMulExpr>(Comm))
  8226. return getMulExpr(NewOps, Comm->getNoWrapFlags());
  8227. if (isa<SCEVMinMaxExpr>(Comm))
  8228. return getMinMaxExpr(Comm->getSCEVType(), NewOps);
  8229. if (isa<SCEVSequentialMinMaxExpr>(Comm))
  8230. return getSequentialMinMaxExpr(Comm->getSCEVType(), NewOps);
  8231. llvm_unreachable("Unknown commutative / sequential min/max SCEV type!");
  8232. }
  8233. }
  8234. // If we got here, all operands are loop invariant.
  8235. return Comm;
  8236. }
  8237. if (const SCEVUDivExpr *Div = dyn_cast<SCEVUDivExpr>(V)) {
  8238. const SCEV *LHS = getSCEVAtScope(Div->getLHS(), L);
  8239. const SCEV *RHS = getSCEVAtScope(Div->getRHS(), L);
  8240. if (LHS == Div->getLHS() && RHS == Div->getRHS())
  8241. return Div; // must be loop invariant
  8242. return getUDivExpr(LHS, RHS);
  8243. }
  8244. // If this is a loop recurrence for a loop that does not contain L, then we
  8245. // are dealing with the final value computed by the loop.
  8246. if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(V)) {
  8247. // First, attempt to evaluate each operand.
  8248. // Avoid performing the look-up in the common case where the specified
  8249. // expression has no loop-variant portions.
  8250. for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) {
  8251. const SCEV *OpAtScope = getSCEVAtScope(AddRec->getOperand(i), L);
  8252. if (OpAtScope == AddRec->getOperand(i))
  8253. continue;
  8254. // Okay, at least one of these operands is loop variant but might be
  8255. // foldable. Build a new instance of the folded commutative expression.
  8256. SmallVector<const SCEV *, 8> NewOps(AddRec->op_begin(),
  8257. AddRec->op_begin()+i);
  8258. NewOps.push_back(OpAtScope);
  8259. for (++i; i != e; ++i)
  8260. NewOps.push_back(getSCEVAtScope(AddRec->getOperand(i), L));
  8261. const SCEV *FoldedRec =
  8262. getAddRecExpr(NewOps, AddRec->getLoop(),
  8263. AddRec->getNoWrapFlags(SCEV::FlagNW));
  8264. AddRec = dyn_cast<SCEVAddRecExpr>(FoldedRec);
  8265. // The addrec may be folded to a nonrecurrence, for example, if the
  8266. // induction variable is multiplied by zero after constant folding. Go
  8267. // ahead and return the folded value.
  8268. if (!AddRec)
  8269. return FoldedRec;
  8270. break;
  8271. }
  8272. // If the scope is outside the addrec's loop, evaluate it by using the
  8273. // loop exit value of the addrec.
  8274. if (!AddRec->getLoop()->contains(L)) {
  8275. // To evaluate this recurrence, we need to know how many times the AddRec
  8276. // loop iterates. Compute this now.
  8277. const SCEV *BackedgeTakenCount = getBackedgeTakenCount(AddRec->getLoop());
  8278. if (BackedgeTakenCount == getCouldNotCompute()) return AddRec;
  8279. // Then, evaluate the AddRec.
  8280. return AddRec->evaluateAtIteration(BackedgeTakenCount, *this);
  8281. }
  8282. return AddRec;
  8283. }
  8284. if (const SCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(V)) {
  8285. const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
  8286. if (Op == Cast->getOperand())
  8287. return Cast; // must be loop invariant
  8288. return getCastExpr(Cast->getSCEVType(), Op, Cast->getType());
  8289. }
  8290. llvm_unreachable("Unknown SCEV type!");
  8291. }
  8292. const SCEV *ScalarEvolution::getSCEVAtScope(Value *V, const Loop *L) {
  8293. return getSCEVAtScope(getSCEV(V), L);
  8294. }
  8295. const SCEV *ScalarEvolution::stripInjectiveFunctions(const SCEV *S) const {
  8296. if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S))
  8297. return stripInjectiveFunctions(ZExt->getOperand());
  8298. if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S))
  8299. return stripInjectiveFunctions(SExt->getOperand());
  8300. return S;
  8301. }
  8302. /// Finds the minimum unsigned root of the following equation:
  8303. ///
  8304. /// A * X = B (mod N)
  8305. ///
  8306. /// where N = 2^BW and BW is the common bit width of A and B. The signedness of
  8307. /// A and B isn't important.
  8308. ///
  8309. /// If the equation does not have a solution, SCEVCouldNotCompute is returned.
  8310. static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const SCEV *B,
  8311. ScalarEvolution &SE) {
  8312. uint32_t BW = A.getBitWidth();
  8313. assert(BW == SE.getTypeSizeInBits(B->getType()));
  8314. assert(A != 0 && "A must be non-zero.");
  8315. // 1. D = gcd(A, N)
  8316. //
  8317. // The gcd of A and N may have only one prime factor: 2. The number of
  8318. // trailing zeros in A is its multiplicity
  8319. uint32_t Mult2 = A.countTrailingZeros();
  8320. // D = 2^Mult2
  8321. // 2. Check if B is divisible by D.
  8322. //
  8323. // B is divisible by D if and only if the multiplicity of prime factor 2 for B
  8324. // is not less than multiplicity of this prime factor for D.
  8325. if (SE.GetMinTrailingZeros(B) < Mult2)
  8326. return SE.getCouldNotCompute();
  8327. // 3. Compute I: the multiplicative inverse of (A / D) in arithmetic
  8328. // modulo (N / D).
  8329. //
  8330. // If D == 1, (N / D) == N == 2^BW, so we need one extra bit to represent
  8331. // (N / D) in general. The inverse itself always fits into BW bits, though,
  8332. // so we immediately truncate it.
  8333. APInt AD = A.lshr(Mult2).zext(BW + 1); // AD = A / D
  8334. APInt Mod(BW + 1, 0);
  8335. Mod.setBit(BW - Mult2); // Mod = N / D
  8336. APInt I = AD.multiplicativeInverse(Mod).trunc(BW);
  8337. // 4. Compute the minimum unsigned root of the equation:
  8338. // I * (B / D) mod (N / D)
  8339. // To simplify the computation, we factor out the divide by D:
  8340. // (I * B mod N) / D
  8341. const SCEV *D = SE.getConstant(APInt::getOneBitSet(BW, Mult2));
  8342. return SE.getUDivExactExpr(SE.getMulExpr(B, SE.getConstant(I)), D);
  8343. }
  8344. /// For a given quadratic addrec, generate coefficients of the corresponding
  8345. /// quadratic equation, multiplied by a common value to ensure that they are
  8346. /// integers.
  8347. /// The returned value is a tuple { A, B, C, M, BitWidth }, where
  8348. /// Ax^2 + Bx + C is the quadratic function, M is the value that A, B and C
  8349. /// were multiplied by, and BitWidth is the bit width of the original addrec
  8350. /// coefficients.
  8351. /// This function returns None if the addrec coefficients are not compile-
  8352. /// time constants.
  8353. static Optional<std::tuple<APInt, APInt, APInt, APInt, unsigned>>
  8354. GetQuadraticEquation(const SCEVAddRecExpr *AddRec) {
  8355. assert(AddRec->getNumOperands() == 3 && "This is not a quadratic chrec!");
  8356. const SCEVConstant *LC = dyn_cast<SCEVConstant>(AddRec->getOperand(0));
  8357. const SCEVConstant *MC = dyn_cast<SCEVConstant>(AddRec->getOperand(1));
  8358. const SCEVConstant *NC = dyn_cast<SCEVConstant>(AddRec->getOperand(2));
  8359. LLVM_DEBUG(dbgs() << __func__ << ": analyzing quadratic addrec: "
  8360. << *AddRec << '\n');
  8361. // We currently can only solve this if the coefficients are constants.
  8362. if (!LC || !MC || !NC) {
  8363. LLVM_DEBUG(dbgs() << __func__ << ": coefficients are not constant\n");
  8364. return None;
  8365. }
  8366. APInt L = LC->getAPInt();
  8367. APInt M = MC->getAPInt();
  8368. APInt N = NC->getAPInt();
  8369. assert(!N.isZero() && "This is not a quadratic addrec");
  8370. unsigned BitWidth = LC->getAPInt().getBitWidth();
  8371. unsigned NewWidth = BitWidth + 1;
  8372. LLVM_DEBUG(dbgs() << __func__ << ": addrec coeff bw: "
  8373. << BitWidth << '\n');
  8374. // The sign-extension (as opposed to a zero-extension) here matches the
  8375. // extension used in SolveQuadraticEquationWrap (with the same motivation).
  8376. N = N.sext(NewWidth);
  8377. M = M.sext(NewWidth);
  8378. L = L.sext(NewWidth);
  8379. // The increments are M, M+N, M+2N, ..., so the accumulated values are
  8380. // L+M, (L+M)+(M+N), (L+M)+(M+N)+(M+2N), ..., that is,
  8381. // L+M, L+2M+N, L+3M+3N, ...
  8382. // After n iterations the accumulated value Acc is L + nM + n(n-1)/2 N.
  8383. //
  8384. // The equation Acc = 0 is then
  8385. // L + nM + n(n-1)/2 N = 0, or 2L + 2M n + n(n-1) N = 0.
  8386. // In a quadratic form it becomes:
  8387. // N n^2 + (2M-N) n + 2L = 0.
  8388. APInt A = N;
  8389. APInt B = 2 * M - A;
  8390. APInt C = 2 * L;
  8391. APInt T = APInt(NewWidth, 2);
  8392. LLVM_DEBUG(dbgs() << __func__ << ": equation " << A << "x^2 + " << B
  8393. << "x + " << C << ", coeff bw: " << NewWidth
  8394. << ", multiplied by " << T << '\n');
  8395. return std::make_tuple(A, B, C, T, BitWidth);
  8396. }
  8397. /// Helper function to compare optional APInts:
  8398. /// (a) if X and Y both exist, return min(X, Y),
  8399. /// (b) if neither X nor Y exist, return None,
  8400. /// (c) if exactly one of X and Y exists, return that value.
  8401. static Optional<APInt> MinOptional(Optional<APInt> X, Optional<APInt> Y) {
  8402. if (X.hasValue() && Y.hasValue()) {
  8403. unsigned W = std::max(X->getBitWidth(), Y->getBitWidth());
  8404. APInt XW = X->sextOrSelf(W);
  8405. APInt YW = Y->sextOrSelf(W);
  8406. return XW.slt(YW) ? *X : *Y;
  8407. }
  8408. if (!X.hasValue() && !Y.hasValue())
  8409. return None;
  8410. return X.hasValue() ? *X : *Y;
  8411. }
  8412. /// Helper function to truncate an optional APInt to a given BitWidth.
  8413. /// When solving addrec-related equations, it is preferable to return a value
  8414. /// that has the same bit width as the original addrec's coefficients. If the
  8415. /// solution fits in the original bit width, truncate it (except for i1).
  8416. /// Returning a value of a different bit width may inhibit some optimizations.
  8417. ///
  8418. /// In general, a solution to a quadratic equation generated from an addrec
  8419. /// may require BW+1 bits, where BW is the bit width of the addrec's
  8420. /// coefficients. The reason is that the coefficients of the quadratic
  8421. /// equation are BW+1 bits wide (to avoid truncation when converting from
  8422. /// the addrec to the equation).
  8423. static Optional<APInt> TruncIfPossible(Optional<APInt> X, unsigned BitWidth) {
  8424. if (!X.hasValue())
  8425. return None;
  8426. unsigned W = X->getBitWidth();
  8427. if (BitWidth > 1 && BitWidth < W && X->isIntN(BitWidth))
  8428. return X->trunc(BitWidth);
  8429. return X;
  8430. }
  8431. /// Let c(n) be the value of the quadratic chrec {L,+,M,+,N} after n
  8432. /// iterations. The values L, M, N are assumed to be signed, and they
  8433. /// should all have the same bit widths.
  8434. /// Find the least n >= 0 such that c(n) = 0 in the arithmetic modulo 2^BW,
  8435. /// where BW is the bit width of the addrec's coefficients.
  8436. /// If the calculated value is a BW-bit integer (for BW > 1), it will be
  8437. /// returned as such, otherwise the bit width of the returned value may
  8438. /// be greater than BW.
  8439. ///
  8440. /// This function returns None if
  8441. /// (a) the addrec coefficients are not constant, or
  8442. /// (b) SolveQuadraticEquationWrap was unable to find a solution. For cases
  8443. /// like x^2 = 5, no integer solutions exist, in other cases an integer
  8444. /// solution may exist, but SolveQuadraticEquationWrap may fail to find it.
  8445. static Optional<APInt>
  8446. SolveQuadraticAddRecExact(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
  8447. APInt A, B, C, M;
  8448. unsigned BitWidth;
  8449. auto T = GetQuadraticEquation(AddRec);
  8450. if (!T.hasValue())
  8451. return None;
  8452. std::tie(A, B, C, M, BitWidth) = *T;
  8453. LLVM_DEBUG(dbgs() << __func__ << ": solving for unsigned overflow\n");
  8454. Optional<APInt> X = APIntOps::SolveQuadraticEquationWrap(A, B, C, BitWidth+1);
  8455. if (!X.hasValue())
  8456. return None;
  8457. ConstantInt *CX = ConstantInt::get(SE.getContext(), *X);
  8458. ConstantInt *V = EvaluateConstantChrecAtConstant(AddRec, CX, SE);
  8459. if (!V->isZero())
  8460. return None;
  8461. return TruncIfPossible(X, BitWidth);
  8462. }
  8463. /// Let c(n) be the value of the quadratic chrec {0,+,M,+,N} after n
  8464. /// iterations. The values M, N are assumed to be signed, and they
  8465. /// should all have the same bit widths.
  8466. /// Find the least n such that c(n) does not belong to the given range,
  8467. /// while c(n-1) does.
  8468. ///
  8469. /// This function returns None if
  8470. /// (a) the addrec coefficients are not constant, or
  8471. /// (b) SolveQuadraticEquationWrap was unable to find a solution for the
  8472. /// bounds of the range.
  8473. static Optional<APInt>
  8474. SolveQuadraticAddRecRange(const SCEVAddRecExpr *AddRec,
  8475. const ConstantRange &Range, ScalarEvolution &SE) {
  8476. assert(AddRec->getOperand(0)->isZero() &&
  8477. "Starting value of addrec should be 0");
  8478. LLVM_DEBUG(dbgs() << __func__ << ": solving boundary crossing for range "
  8479. << Range << ", addrec " << *AddRec << '\n');
  8480. // This case is handled in getNumIterationsInRange. Here we can assume that
  8481. // we start in the range.
  8482. assert(Range.contains(APInt(SE.getTypeSizeInBits(AddRec->getType()), 0)) &&
  8483. "Addrec's initial value should be in range");
  8484. APInt A, B, C, M;
  8485. unsigned BitWidth;
  8486. auto T = GetQuadraticEquation(AddRec);
  8487. if (!T.hasValue())
  8488. return None;
  8489. // Be careful about the return value: there can be two reasons for not
  8490. // returning an actual number. First, if no solutions to the equations
  8491. // were found, and second, if the solutions don't leave the given range.
  8492. // The first case means that the actual solution is "unknown", the second
  8493. // means that it's known, but not valid. If the solution is unknown, we
  8494. // cannot make any conclusions.
  8495. // Return a pair: the optional solution and a flag indicating if the
  8496. // solution was found.
  8497. auto SolveForBoundary = [&](APInt Bound) -> std::pair<Optional<APInt>,bool> {
  8498. // Solve for signed overflow and unsigned overflow, pick the lower
  8499. // solution.
  8500. LLVM_DEBUG(dbgs() << "SolveQuadraticAddRecRange: checking boundary "
  8501. << Bound << " (before multiplying by " << M << ")\n");
  8502. Bound *= M; // The quadratic equation multiplier.
  8503. Optional<APInt> SO = None;
  8504. if (BitWidth > 1) {
  8505. LLVM_DEBUG(dbgs() << "SolveQuadraticAddRecRange: solving for "
  8506. "signed overflow\n");
  8507. SO = APIntOps::SolveQuadraticEquationWrap(A, B, -Bound, BitWidth);
  8508. }
  8509. LLVM_DEBUG(dbgs() << "SolveQuadraticAddRecRange: solving for "
  8510. "unsigned overflow\n");
  8511. Optional<APInt> UO = APIntOps::SolveQuadraticEquationWrap(A, B, -Bound,
  8512. BitWidth+1);
  8513. auto LeavesRange = [&] (const APInt &X) {
  8514. ConstantInt *C0 = ConstantInt::get(SE.getContext(), X);
  8515. ConstantInt *V0 = EvaluateConstantChrecAtConstant(AddRec, C0, SE);
  8516. if (Range.contains(V0->getValue()))
  8517. return false;
  8518. // X should be at least 1, so X-1 is non-negative.
  8519. ConstantInt *C1 = ConstantInt::get(SE.getContext(), X-1);
  8520. ConstantInt *V1 = EvaluateConstantChrecAtConstant(AddRec, C1, SE);
  8521. if (Range.contains(V1->getValue()))
  8522. return true;
  8523. return false;
  8524. };
  8525. // If SolveQuadraticEquationWrap returns None, it means that there can
  8526. // be a solution, but the function failed to find it. We cannot treat it
  8527. // as "no solution".
  8528. if (!SO.hasValue() || !UO.hasValue())
  8529. return { None, false };
  8530. // Check the smaller value first to see if it leaves the range.
  8531. // At this point, both SO and UO must have values.
  8532. Optional<APInt> Min = MinOptional(SO, UO);
  8533. if (LeavesRange(*Min))
  8534. return { Min, true };
  8535. Optional<APInt> Max = Min == SO ? UO : SO;
  8536. if (LeavesRange(*Max))
  8537. return { Max, true };
  8538. // Solutions were found, but were eliminated, hence the "true".
  8539. return { None, true };
  8540. };
  8541. std::tie(A, B, C, M, BitWidth) = *T;
  8542. // Lower bound is inclusive, subtract 1 to represent the exiting value.
  8543. APInt Lower = Range.getLower().sextOrSelf(A.getBitWidth()) - 1;
  8544. APInt Upper = Range.getUpper().sextOrSelf(A.getBitWidth());
  8545. auto SL = SolveForBoundary(Lower);
  8546. auto SU = SolveForBoundary(Upper);
  8547. // If any of the solutions was unknown, no meaninigful conclusions can
  8548. // be made.
  8549. if (!SL.second || !SU.second)
  8550. return None;
  8551. // Claim: The correct solution is not some value between Min and Max.
  8552. //
  8553. // Justification: Assuming that Min and Max are different values, one of
  8554. // them is when the first signed overflow happens, the other is when the
  8555. // first unsigned overflow happens. Crossing the range boundary is only
  8556. // possible via an overflow (treating 0 as a special case of it, modeling
  8557. // an overflow as crossing k*2^W for some k).
  8558. //
  8559. // The interesting case here is when Min was eliminated as an invalid
  8560. // solution, but Max was not. The argument is that if there was another
  8561. // overflow between Min and Max, it would also have been eliminated if
  8562. // it was considered.
  8563. //
  8564. // For a given boundary, it is possible to have two overflows of the same
  8565. // type (signed/unsigned) without having the other type in between: this
  8566. // can happen when the vertex of the parabola is between the iterations
  8567. // corresponding to the overflows. This is only possible when the two
  8568. // overflows cross k*2^W for the same k. In such case, if the second one
  8569. // left the range (and was the first one to do so), the first overflow
  8570. // would have to enter the range, which would mean that either we had left
  8571. // the range before or that we started outside of it. Both of these cases
  8572. // are contradictions.
  8573. //
  8574. // Claim: In the case where SolveForBoundary returns None, the correct
  8575. // solution is not some value between the Max for this boundary and the
  8576. // Min of the other boundary.
  8577. //
  8578. // Justification: Assume that we had such Max_A and Min_B corresponding
  8579. // to range boundaries A and B and such that Max_A < Min_B. If there was
  8580. // a solution between Max_A and Min_B, it would have to be caused by an
  8581. // overflow corresponding to either A or B. It cannot correspond to B,
  8582. // since Min_B is the first occurrence of such an overflow. If it
  8583. // corresponded to A, it would have to be either a signed or an unsigned
  8584. // overflow that is larger than both eliminated overflows for A. But
  8585. // between the eliminated overflows and this overflow, the values would
  8586. // cover the entire value space, thus crossing the other boundary, which
  8587. // is a contradiction.
  8588. return TruncIfPossible(MinOptional(SL.first, SU.first), BitWidth);
  8589. }
  8590. ScalarEvolution::ExitLimit
  8591. ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit,
  8592. bool AllowPredicates) {
  8593. // This is only used for loops with a "x != y" exit test. The exit condition
  8594. // is now expressed as a single expression, V = x-y. So the exit test is
  8595. // effectively V != 0. We know and take advantage of the fact that this
  8596. // expression only being used in a comparison by zero context.
  8597. SmallPtrSet<const SCEVPredicate *, 4> Predicates;
  8598. // If the value is a constant
  8599. if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
  8600. // If the value is already zero, the branch will execute zero times.
  8601. if (C->getValue()->isZero()) return C;
  8602. return getCouldNotCompute(); // Otherwise it will loop infinitely.
  8603. }
  8604. const SCEVAddRecExpr *AddRec =
  8605. dyn_cast<SCEVAddRecExpr>(stripInjectiveFunctions(V));
  8606. if (!AddRec && AllowPredicates)
  8607. // Try to make this an AddRec using runtime tests, in the first X
  8608. // iterations of this loop, where X is the SCEV expression found by the
  8609. // algorithm below.
  8610. AddRec = convertSCEVToAddRecWithPredicates(V, L, Predicates);
  8611. if (!AddRec || AddRec->getLoop() != L)
  8612. return getCouldNotCompute();
  8613. // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of
  8614. // the quadratic equation to solve it.
  8615. if (AddRec->isQuadratic() && AddRec->getType()->isIntegerTy()) {
  8616. // We can only use this value if the chrec ends up with an exact zero
  8617. // value at this index. When solving for "X*X != 5", for example, we
  8618. // should not accept a root of 2.
  8619. if (auto S = SolveQuadraticAddRecExact(AddRec, *this)) {
  8620. const auto *R = cast<SCEVConstant>(getConstant(S.getValue()));
  8621. return ExitLimit(R, R, false, Predicates);
  8622. }
  8623. return getCouldNotCompute();
  8624. }
  8625. // Otherwise we can only handle this if it is affine.
  8626. if (!AddRec->isAffine())
  8627. return getCouldNotCompute();
  8628. // If this is an affine expression, the execution count of this branch is
  8629. // the minimum unsigned root of the following equation:
  8630. //
  8631. // Start + Step*N = 0 (mod 2^BW)
  8632. //
  8633. // equivalent to:
  8634. //
  8635. // Step*N = -Start (mod 2^BW)
  8636. //
  8637. // where BW is the common bit width of Start and Step.
  8638. // Get the initial value for the loop.
  8639. const SCEV *Start = getSCEVAtScope(AddRec->getStart(), L->getParentLoop());
  8640. const SCEV *Step = getSCEVAtScope(AddRec->getOperand(1), L->getParentLoop());
  8641. // For now we handle only constant steps.
  8642. //
  8643. // TODO: Handle a nonconstant Step given AddRec<NUW>. If the
  8644. // AddRec is NUW, then (in an unsigned sense) it cannot be counting up to wrap
  8645. // to 0, it must be counting down to equal 0. Consequently, N = Start / -Step.
  8646. // We have not yet seen any such cases.
  8647. const SCEVConstant *StepC = dyn_cast<SCEVConstant>(Step);
  8648. if (!StepC || StepC->getValue()->isZero())
  8649. return getCouldNotCompute();
  8650. // For positive steps (counting up until unsigned overflow):
  8651. // N = -Start/Step (as unsigned)
  8652. // For negative steps (counting down to zero):
  8653. // N = Start/-Step
  8654. // First compute the unsigned distance from zero in the direction of Step.
  8655. bool CountDown = StepC->getAPInt().isNegative();
  8656. const SCEV *Distance = CountDown ? Start : getNegativeSCEV(Start);
  8657. // Handle unitary steps, which cannot wraparound.
  8658. // 1*N = -Start; -1*N = Start (mod 2^BW), so:
  8659. // N = Distance (as unsigned)
  8660. if (StepC->getValue()->isOne() || StepC->getValue()->isMinusOne()) {
  8661. APInt MaxBECount = getUnsignedRangeMax(applyLoopGuards(Distance, L));
  8662. MaxBECount = APIntOps::umin(MaxBECount, getUnsignedRangeMax(Distance));
  8663. // When a loop like "for (int i = 0; i != n; ++i) { /* body */ }" is rotated,
  8664. // we end up with a loop whose backedge-taken count is n - 1. Detect this
  8665. // case, and see if we can improve the bound.
  8666. //
  8667. // Explicitly handling this here is necessary because getUnsignedRange
  8668. // isn't context-sensitive; it doesn't know that we only care about the
  8669. // range inside the loop.
  8670. const SCEV *Zero = getZero(Distance->getType());
  8671. const SCEV *One = getOne(Distance->getType());
  8672. const SCEV *DistancePlusOne = getAddExpr(Distance, One);
  8673. if (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, DistancePlusOne, Zero)) {
  8674. // If Distance + 1 doesn't overflow, we can compute the maximum distance
  8675. // as "unsigned_max(Distance + 1) - 1".
  8676. ConstantRange CR = getUnsignedRange(DistancePlusOne);
  8677. MaxBECount = APIntOps::umin(MaxBECount, CR.getUnsignedMax() - 1);
  8678. }
  8679. return ExitLimit(Distance, getConstant(MaxBECount), false, Predicates);
  8680. }
  8681. // If the condition controls loop exit (the loop exits only if the expression
  8682. // is true) and the addition is no-wrap we can use unsigned divide to
  8683. // compute the backedge count. In this case, the step may not divide the
  8684. // distance, but we don't care because if the condition is "missed" the loop
  8685. // will have undefined behavior due to wrapping.
  8686. if (ControlsExit && AddRec->hasNoSelfWrap() &&
  8687. loopHasNoAbnormalExits(AddRec->getLoop())) {
  8688. const SCEV *Exact =
  8689. getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
  8690. const SCEV *Max = getCouldNotCompute();
  8691. if (Exact != getCouldNotCompute()) {
  8692. APInt MaxInt = getUnsignedRangeMax(applyLoopGuards(Exact, L));
  8693. Max = getConstant(APIntOps::umin(MaxInt, getUnsignedRangeMax(Exact)));
  8694. }
  8695. return ExitLimit(Exact, Max, false, Predicates);
  8696. }
  8697. // Solve the general equation.
  8698. const SCEV *E = SolveLinEquationWithOverflow(StepC->getAPInt(),
  8699. getNegativeSCEV(Start), *this);
  8700. const SCEV *M = E;
  8701. if (E != getCouldNotCompute()) {
  8702. APInt MaxWithGuards = getUnsignedRangeMax(applyLoopGuards(E, L));
  8703. M = getConstant(APIntOps::umin(MaxWithGuards, getUnsignedRangeMax(E)));
  8704. }
  8705. return ExitLimit(E, M, false, Predicates);
  8706. }
  8707. ScalarEvolution::ExitLimit
  8708. ScalarEvolution::howFarToNonZero(const SCEV *V, const Loop *L) {
  8709. // Loops that look like: while (X == 0) are very strange indeed. We don't
  8710. // handle them yet except for the trivial case. This could be expanded in the
  8711. // future as needed.
  8712. // If the value is a constant, check to see if it is known to be non-zero
  8713. // already. If so, the backedge will execute zero times.
  8714. if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
  8715. if (!C->getValue()->isZero())
  8716. return getZero(C->getType());
  8717. return getCouldNotCompute(); // Otherwise it will loop infinitely.
  8718. }
  8719. // We could implement others, but I really doubt anyone writes loops like
  8720. // this, and if they did, they would already be constant folded.
  8721. return getCouldNotCompute();
  8722. }
  8723. std::pair<const BasicBlock *, const BasicBlock *>
  8724. ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(const BasicBlock *BB)
  8725. const {
  8726. // If the block has a unique predecessor, then there is no path from the
  8727. // predecessor to the block that does not go through the direct edge
  8728. // from the predecessor to the block.
  8729. if (const BasicBlock *Pred = BB->getSinglePredecessor())
  8730. return {Pred, BB};
  8731. // A loop's header is defined to be a block that dominates the loop.
  8732. // If the header has a unique predecessor outside the loop, it must be
  8733. // a block that has exactly one successor that can reach the loop.
  8734. if (const Loop *L = LI.getLoopFor(BB))
  8735. return {L->getLoopPredecessor(), L->getHeader()};
  8736. return {nullptr, nullptr};
  8737. }
  8738. /// SCEV structural equivalence is usually sufficient for testing whether two
  8739. /// expressions are equal, however for the purposes of looking for a condition
  8740. /// guarding a loop, it can be useful to be a little more general, since a
  8741. /// front-end may have replicated the controlling expression.
  8742. static bool HasSameValue(const SCEV *A, const SCEV *B) {
  8743. // Quick check to see if they are the same SCEV.
  8744. if (A == B) return true;
  8745. auto ComputesEqualValues = [](const Instruction *A, const Instruction *B) {
  8746. // Not all instructions that are "identical" compute the same value. For
  8747. // instance, two distinct alloca instructions allocating the same type are
  8748. // identical and do not read memory; but compute distinct values.
  8749. return A->isIdenticalTo(B) && (isa<BinaryOperator>(A) || isa<GetElementPtrInst>(A));
  8750. };
  8751. // Otherwise, if they're both SCEVUnknown, it's possible that they hold
  8752. // two different instructions with the same value. Check for this case.
  8753. if (const SCEVUnknown *AU = dyn_cast<SCEVUnknown>(A))
  8754. if (const SCEVUnknown *BU = dyn_cast<SCEVUnknown>(B))
  8755. if (const Instruction *AI = dyn_cast<Instruction>(AU->getValue()))
  8756. if (const Instruction *BI = dyn_cast<Instruction>(BU->getValue()))
  8757. if (ComputesEqualValues(AI, BI))
  8758. return true;
  8759. // Otherwise assume they may have a different value.
  8760. return false;
  8761. }
  8762. bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
  8763. const SCEV *&LHS, const SCEV *&RHS,
  8764. unsigned Depth,
  8765. bool ControllingFiniteLoop) {
  8766. bool Changed = false;
  8767. // Simplifies ICMP to trivial true or false by turning it into '0 == 0' or
  8768. // '0 != 0'.
  8769. auto TrivialCase = [&](bool TriviallyTrue) {
  8770. LHS = RHS = getConstant(ConstantInt::getFalse(getContext()));
  8771. Pred = TriviallyTrue ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE;
  8772. return true;
  8773. };
  8774. // If we hit the max recursion limit bail out.
  8775. if (Depth >= 3)
  8776. return false;
  8777. // Canonicalize a constant to the right side.
  8778. if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) {
  8779. // Check for both operands constant.
  8780. if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) {
  8781. if (ConstantExpr::getICmp(Pred,
  8782. LHSC->getValue(),
  8783. RHSC->getValue())->isNullValue())
  8784. return TrivialCase(false);
  8785. else
  8786. return TrivialCase(true);
  8787. }
  8788. // Otherwise swap the operands to put the constant on the right.
  8789. std::swap(LHS, RHS);
  8790. Pred = ICmpInst::getSwappedPredicate(Pred);
  8791. Changed = true;
  8792. }
  8793. // If we're comparing an addrec with a value which is loop-invariant in the
  8794. // addrec's loop, put the addrec on the left. Also make a dominance check,
  8795. // as both operands could be addrecs loop-invariant in each other's loop.
  8796. if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(RHS)) {
  8797. const Loop *L = AR->getLoop();
  8798. if (isLoopInvariant(LHS, L) && properlyDominates(LHS, L->getHeader())) {
  8799. std::swap(LHS, RHS);
  8800. Pred = ICmpInst::getSwappedPredicate(Pred);
  8801. Changed = true;
  8802. }
  8803. }
  8804. // If there's a constant operand, canonicalize comparisons with boundary
  8805. // cases, and canonicalize *-or-equal comparisons to regular comparisons.
  8806. if (const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS)) {
  8807. const APInt &RA = RC->getAPInt();
  8808. bool SimplifiedByConstantRange = false;
  8809. if (!ICmpInst::isEquality(Pred)) {
  8810. ConstantRange ExactCR = ConstantRange::makeExactICmpRegion(Pred, RA);
  8811. if (ExactCR.isFullSet())
  8812. return TrivialCase(true);
  8813. else if (ExactCR.isEmptySet())
  8814. return TrivialCase(false);
  8815. APInt NewRHS;
  8816. CmpInst::Predicate NewPred;
  8817. if (ExactCR.getEquivalentICmp(NewPred, NewRHS) &&
  8818. ICmpInst::isEquality(NewPred)) {
  8819. // We were able to convert an inequality to an equality.
  8820. Pred = NewPred;
  8821. RHS = getConstant(NewRHS);
  8822. Changed = SimplifiedByConstantRange = true;
  8823. }
  8824. }
  8825. if (!SimplifiedByConstantRange) {
  8826. switch (Pred) {
  8827. default:
  8828. break;
  8829. case ICmpInst::ICMP_EQ:
  8830. case ICmpInst::ICMP_NE:
  8831. // Fold ((-1) * %a) + %b == 0 (equivalent to %b-%a == 0) into %a == %b.
  8832. if (!RA)
  8833. if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(LHS))
  8834. if (const SCEVMulExpr *ME =
  8835. dyn_cast<SCEVMulExpr>(AE->getOperand(0)))
  8836. if (AE->getNumOperands() == 2 && ME->getNumOperands() == 2 &&
  8837. ME->getOperand(0)->isAllOnesValue()) {
  8838. RHS = AE->getOperand(1);
  8839. LHS = ME->getOperand(1);
  8840. Changed = true;
  8841. }
  8842. break;
  8843. // The "Should have been caught earlier!" messages refer to the fact
  8844. // that the ExactCR.isFullSet() or ExactCR.isEmptySet() check above
  8845. // should have fired on the corresponding cases, and canonicalized the
  8846. // check to trivial case.
  8847. case ICmpInst::ICMP_UGE:
  8848. assert(!RA.isMinValue() && "Should have been caught earlier!");
  8849. Pred = ICmpInst::ICMP_UGT;
  8850. RHS = getConstant(RA - 1);
  8851. Changed = true;
  8852. break;
  8853. case ICmpInst::ICMP_ULE:
  8854. assert(!RA.isMaxValue() && "Should have been caught earlier!");
  8855. Pred = ICmpInst::ICMP_ULT;
  8856. RHS = getConstant(RA + 1);
  8857. Changed = true;
  8858. break;
  8859. case ICmpInst::ICMP_SGE:
  8860. assert(!RA.isMinSignedValue() && "Should have been caught earlier!");
  8861. Pred = ICmpInst::ICMP_SGT;
  8862. RHS = getConstant(RA - 1);
  8863. Changed = true;
  8864. break;
  8865. case ICmpInst::ICMP_SLE:
  8866. assert(!RA.isMaxSignedValue() && "Should have been caught earlier!");
  8867. Pred = ICmpInst::ICMP_SLT;
  8868. RHS = getConstant(RA + 1);
  8869. Changed = true;
  8870. break;
  8871. }
  8872. }
  8873. }
  8874. // Check for obvious equality.
  8875. if (HasSameValue(LHS, RHS)) {
  8876. if (ICmpInst::isTrueWhenEqual(Pred))
  8877. return TrivialCase(true);
  8878. if (ICmpInst::isFalseWhenEqual(Pred))
  8879. return TrivialCase(false);
  8880. }
  8881. // If possible, canonicalize GE/LE comparisons to GT/LT comparisons, by
  8882. // adding or subtracting 1 from one of the operands. This can be done for
  8883. // one of two reasons:
  8884. // 1) The range of the RHS does not include the (signed/unsigned) boundaries
  8885. // 2) The loop is finite, with this comparison controlling the exit. Since the
  8886. // loop is finite, the bound cannot include the corresponding boundary
  8887. // (otherwise it would loop forever).
  8888. switch (Pred) {
  8889. case ICmpInst::ICMP_SLE:
  8890. if (ControllingFiniteLoop || !getSignedRangeMax(RHS).isMaxSignedValue()) {
  8891. RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS,
  8892. SCEV::FlagNSW);
  8893. Pred = ICmpInst::ICMP_SLT;
  8894. Changed = true;
  8895. } else if (!getSignedRangeMin(LHS).isMinSignedValue()) {
  8896. LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS,
  8897. SCEV::FlagNSW);
  8898. Pred = ICmpInst::ICMP_SLT;
  8899. Changed = true;
  8900. }
  8901. break;
  8902. case ICmpInst::ICMP_SGE:
  8903. if (ControllingFiniteLoop || !getSignedRangeMin(RHS).isMinSignedValue()) {
  8904. RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS,
  8905. SCEV::FlagNSW);
  8906. Pred = ICmpInst::ICMP_SGT;
  8907. Changed = true;
  8908. } else if (!getSignedRangeMax(LHS).isMaxSignedValue()) {
  8909. LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS,
  8910. SCEV::FlagNSW);
  8911. Pred = ICmpInst::ICMP_SGT;
  8912. Changed = true;
  8913. }
  8914. break;
  8915. case ICmpInst::ICMP_ULE:
  8916. if (ControllingFiniteLoop || !getUnsignedRangeMax(RHS).isMaxValue()) {
  8917. RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS,
  8918. SCEV::FlagNUW);
  8919. Pred = ICmpInst::ICMP_ULT;
  8920. Changed = true;
  8921. } else if (!getUnsignedRangeMin(LHS).isMinValue()) {
  8922. LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS);
  8923. Pred = ICmpInst::ICMP_ULT;
  8924. Changed = true;
  8925. }
  8926. break;
  8927. case ICmpInst::ICMP_UGE:
  8928. if (ControllingFiniteLoop || !getUnsignedRangeMin(RHS).isMinValue()) {
  8929. RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS);
  8930. Pred = ICmpInst::ICMP_UGT;
  8931. Changed = true;
  8932. } else if (!getUnsignedRangeMax(LHS).isMaxValue()) {
  8933. LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS,
  8934. SCEV::FlagNUW);
  8935. Pred = ICmpInst::ICMP_UGT;
  8936. Changed = true;
  8937. }
  8938. break;
  8939. default:
  8940. break;
  8941. }
  8942. // TODO: More simplifications are possible here.
  8943. // Recursively simplify until we either hit a recursion limit or nothing
  8944. // changes.
  8945. if (Changed)
  8946. return SimplifyICmpOperands(Pred, LHS, RHS, Depth + 1,
  8947. ControllingFiniteLoop);
  8948. return Changed;
  8949. }
  8950. bool ScalarEvolution::isKnownNegative(const SCEV *S) {
  8951. return getSignedRangeMax(S).isNegative();
  8952. }
  8953. bool ScalarEvolution::isKnownPositive(const SCEV *S) {
  8954. return getSignedRangeMin(S).isStrictlyPositive();
  8955. }
  8956. bool ScalarEvolution::isKnownNonNegative(const SCEV *S) {
  8957. return !getSignedRangeMin(S).isNegative();
  8958. }
  8959. bool ScalarEvolution::isKnownNonPositive(const SCEV *S) {
  8960. return !getSignedRangeMax(S).isStrictlyPositive();
  8961. }
  8962. bool ScalarEvolution::isKnownNonZero(const SCEV *S) {
  8963. return getUnsignedRangeMin(S) != 0;
  8964. }
  8965. std::pair<const SCEV *, const SCEV *>
  8966. ScalarEvolution::SplitIntoInitAndPostInc(const Loop *L, const SCEV *S) {
  8967. // Compute SCEV on entry of loop L.
  8968. const SCEV *Start = SCEVInitRewriter::rewrite(S, L, *this);
  8969. if (Start == getCouldNotCompute())
  8970. return { Start, Start };
  8971. // Compute post increment SCEV for loop L.
  8972. const SCEV *PostInc = SCEVPostIncRewriter::rewrite(S, L, *this);
  8973. assert(PostInc != getCouldNotCompute() && "Unexpected could not compute");
  8974. return { Start, PostInc };
  8975. }
  8976. bool ScalarEvolution::isKnownViaInduction(ICmpInst::Predicate Pred,
  8977. const SCEV *LHS, const SCEV *RHS) {
  8978. // First collect all loops.
  8979. SmallPtrSet<const Loop *, 8> LoopsUsed;
  8980. getUsedLoops(LHS, LoopsUsed);
  8981. getUsedLoops(RHS, LoopsUsed);
  8982. if (LoopsUsed.empty())
  8983. return false;
  8984. // Domination relationship must be a linear order on collected loops.
  8985. #ifndef NDEBUG
  8986. for (auto *L1 : LoopsUsed)
  8987. for (auto *L2 : LoopsUsed)
  8988. assert((DT.dominates(L1->getHeader(), L2->getHeader()) ||
  8989. DT.dominates(L2->getHeader(), L1->getHeader())) &&
  8990. "Domination relationship is not a linear order");
  8991. #endif
  8992. const Loop *MDL =
  8993. *std::max_element(LoopsUsed.begin(), LoopsUsed.end(),
  8994. [&](const Loop *L1, const Loop *L2) {
  8995. return DT.properlyDominates(L1->getHeader(), L2->getHeader());
  8996. });
  8997. // Get init and post increment value for LHS.
  8998. auto SplitLHS = SplitIntoInitAndPostInc(MDL, LHS);
  8999. // if LHS contains unknown non-invariant SCEV then bail out.
  9000. if (SplitLHS.first == getCouldNotCompute())
  9001. return false;
  9002. assert (SplitLHS.second != getCouldNotCompute() && "Unexpected CNC");
  9003. // Get init and post increment value for RHS.
  9004. auto SplitRHS = SplitIntoInitAndPostInc(MDL, RHS);
  9005. // if RHS contains unknown non-invariant SCEV then bail out.
  9006. if (SplitRHS.first == getCouldNotCompute())
  9007. return false;
  9008. assert (SplitRHS.second != getCouldNotCompute() && "Unexpected CNC");
  9009. // It is possible that init SCEV contains an invariant load but it does
  9010. // not dominate MDL and is not available at MDL loop entry, so we should
  9011. // check it here.
  9012. if (!isAvailableAtLoopEntry(SplitLHS.first, MDL) ||
  9013. !isAvailableAtLoopEntry(SplitRHS.first, MDL))
  9014. return false;
  9015. // It seems backedge guard check is faster than entry one so in some cases
  9016. // it can speed up whole estimation by short circuit
  9017. return isLoopBackedgeGuardedByCond(MDL, Pred, SplitLHS.second,
  9018. SplitRHS.second) &&
  9019. isLoopEntryGuardedByCond(MDL, Pred, SplitLHS.first, SplitRHS.first);
  9020. }
  9021. bool ScalarEvolution::isKnownPredicate(ICmpInst::Predicate Pred,
  9022. const SCEV *LHS, const SCEV *RHS) {
  9023. // Canonicalize the inputs first.
  9024. (void)SimplifyICmpOperands(Pred, LHS, RHS);
  9025. if (isKnownViaInduction(Pred, LHS, RHS))
  9026. return true;
  9027. if (isKnownPredicateViaSplitting(Pred, LHS, RHS))
  9028. return true;
  9029. // Otherwise see what can be done with some simple reasoning.
  9030. return isKnownViaNonRecursiveReasoning(Pred, LHS, RHS);
  9031. }
  9032. Optional<bool> ScalarEvolution::evaluatePredicate(ICmpInst::Predicate Pred,
  9033. const SCEV *LHS,
  9034. const SCEV *RHS) {
  9035. if (isKnownPredicate(Pred, LHS, RHS))
  9036. return true;
  9037. else if (isKnownPredicate(ICmpInst::getInversePredicate(Pred), LHS, RHS))
  9038. return false;
  9039. return None;
  9040. }
  9041. bool ScalarEvolution::isKnownPredicateAt(ICmpInst::Predicate Pred,
  9042. const SCEV *LHS, const SCEV *RHS,
  9043. const Instruction *CtxI) {
  9044. // TODO: Analyze guards and assumes from Context's block.
  9045. return isKnownPredicate(Pred, LHS, RHS) ||
  9046. isBasicBlockEntryGuardedByCond(CtxI->getParent(), Pred, LHS, RHS);
  9047. }
  9048. Optional<bool> ScalarEvolution::evaluatePredicateAt(ICmpInst::Predicate Pred,
  9049. const SCEV *LHS,
  9050. const SCEV *RHS,
  9051. const Instruction *CtxI) {
  9052. Optional<bool> KnownWithoutContext = evaluatePredicate(Pred, LHS, RHS);
  9053. if (KnownWithoutContext)
  9054. return KnownWithoutContext;
  9055. if (isBasicBlockEntryGuardedByCond(CtxI->getParent(), Pred, LHS, RHS))
  9056. return true;
  9057. else if (isBasicBlockEntryGuardedByCond(CtxI->getParent(),
  9058. ICmpInst::getInversePredicate(Pred),
  9059. LHS, RHS))
  9060. return false;
  9061. return None;
  9062. }
  9063. bool ScalarEvolution::isKnownOnEveryIteration(ICmpInst::Predicate Pred,
  9064. const SCEVAddRecExpr *LHS,
  9065. const SCEV *RHS) {
  9066. const Loop *L = LHS->getLoop();
  9067. return isLoopEntryGuardedByCond(L, Pred, LHS->getStart(), RHS) &&
  9068. isLoopBackedgeGuardedByCond(L, Pred, LHS->getPostIncExpr(*this), RHS);
  9069. }
  9070. Optional<ScalarEvolution::MonotonicPredicateType>
  9071. ScalarEvolution::getMonotonicPredicateType(const SCEVAddRecExpr *LHS,
  9072. ICmpInst::Predicate Pred) {
  9073. auto Result = getMonotonicPredicateTypeImpl(LHS, Pred);
  9074. #ifndef NDEBUG
  9075. // Verify an invariant: inverting the predicate should turn a monotonically
  9076. // increasing change to a monotonically decreasing one, and vice versa.
  9077. if (Result) {
  9078. auto ResultSwapped =
  9079. getMonotonicPredicateTypeImpl(LHS, ICmpInst::getSwappedPredicate(Pred));
  9080. assert(ResultSwapped.hasValue() && "should be able to analyze both!");
  9081. assert(ResultSwapped.getValue() != Result.getValue() &&
  9082. "monotonicity should flip as we flip the predicate");
  9083. }
  9084. #endif
  9085. return Result;
  9086. }
  9087. Optional<ScalarEvolution::MonotonicPredicateType>
  9088. ScalarEvolution::getMonotonicPredicateTypeImpl(const SCEVAddRecExpr *LHS,
  9089. ICmpInst::Predicate Pred) {
  9090. // A zero step value for LHS means the induction variable is essentially a
  9091. // loop invariant value. We don't really depend on the predicate actually
  9092. // flipping from false to true (for increasing predicates, and the other way
  9093. // around for decreasing predicates), all we care about is that *if* the
  9094. // predicate changes then it only changes from false to true.
  9095. //
  9096. // A zero step value in itself is not very useful, but there may be places
  9097. // where SCEV can prove X >= 0 but not prove X > 0, so it is helpful to be
  9098. // as general as possible.
  9099. // Only handle LE/LT/GE/GT predicates.
  9100. if (!ICmpInst::isRelational(Pred))
  9101. return None;
  9102. bool IsGreater = ICmpInst::isGE(Pred) || ICmpInst::isGT(Pred);
  9103. assert((IsGreater || ICmpInst::isLE(Pred) || ICmpInst::isLT(Pred)) &&
  9104. "Should be greater or less!");
  9105. // Check that AR does not wrap.
  9106. if (ICmpInst::isUnsigned(Pred)) {
  9107. if (!LHS->hasNoUnsignedWrap())
  9108. return None;
  9109. return IsGreater ? MonotonicallyIncreasing : MonotonicallyDecreasing;
  9110. } else {
  9111. assert(ICmpInst::isSigned(Pred) &&
  9112. "Relational predicate is either signed or unsigned!");
  9113. if (!LHS->hasNoSignedWrap())
  9114. return None;
  9115. const SCEV *Step = LHS->getStepRecurrence(*this);
  9116. if (isKnownNonNegative(Step))
  9117. return IsGreater ? MonotonicallyIncreasing : MonotonicallyDecreasing;
  9118. if (isKnownNonPositive(Step))
  9119. return !IsGreater ? MonotonicallyIncreasing : MonotonicallyDecreasing;
  9120. return None;
  9121. }
  9122. }
  9123. Optional<ScalarEvolution::LoopInvariantPredicate>
  9124. ScalarEvolution::getLoopInvariantPredicate(ICmpInst::Predicate Pred,
  9125. const SCEV *LHS, const SCEV *RHS,
  9126. const Loop *L) {
  9127. // If there is a loop-invariant, force it into the RHS, otherwise bail out.
  9128. if (!isLoopInvariant(RHS, L)) {
  9129. if (!isLoopInvariant(LHS, L))
  9130. return None;
  9131. std::swap(LHS, RHS);
  9132. Pred = ICmpInst::getSwappedPredicate(Pred);
  9133. }
  9134. const SCEVAddRecExpr *ArLHS = dyn_cast<SCEVAddRecExpr>(LHS);
  9135. if (!ArLHS || ArLHS->getLoop() != L)
  9136. return None;
  9137. auto MonotonicType = getMonotonicPredicateType(ArLHS, Pred);
  9138. if (!MonotonicType)
  9139. return None;
  9140. // If the predicate "ArLHS `Pred` RHS" monotonically increases from false to
  9141. // true as the loop iterates, and the backedge is control dependent on
  9142. // "ArLHS `Pred` RHS" == true then we can reason as follows:
  9143. //
  9144. // * if the predicate was false in the first iteration then the predicate
  9145. // is never evaluated again, since the loop exits without taking the
  9146. // backedge.
  9147. // * if the predicate was true in the first iteration then it will
  9148. // continue to be true for all future iterations since it is
  9149. // monotonically increasing.
  9150. //
  9151. // For both the above possibilities, we can replace the loop varying
  9152. // predicate with its value on the first iteration of the loop (which is
  9153. // loop invariant).
  9154. //
  9155. // A similar reasoning applies for a monotonically decreasing predicate, by
  9156. // replacing true with false and false with true in the above two bullets.
  9157. bool Increasing = *MonotonicType == ScalarEvolution::MonotonicallyIncreasing;
  9158. auto P = Increasing ? Pred : ICmpInst::getInversePredicate(Pred);
  9159. if (!isLoopBackedgeGuardedByCond(L, P, LHS, RHS))
  9160. return None;
  9161. return ScalarEvolution::LoopInvariantPredicate(Pred, ArLHS->getStart(), RHS);
  9162. }
  9163. Optional<ScalarEvolution::LoopInvariantPredicate>
  9164. ScalarEvolution::getLoopInvariantExitCondDuringFirstIterations(
  9165. ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Loop *L,
  9166. const Instruction *CtxI, const SCEV *MaxIter) {
  9167. // Try to prove the following set of facts:
  9168. // - The predicate is monotonic in the iteration space.
  9169. // - If the check does not fail on the 1st iteration:
  9170. // - No overflow will happen during first MaxIter iterations;
  9171. // - It will not fail on the MaxIter'th iteration.
  9172. // If the check does fail on the 1st iteration, we leave the loop and no
  9173. // other checks matter.
  9174. // If there is a loop-invariant, force it into the RHS, otherwise bail out.
  9175. if (!isLoopInvariant(RHS, L)) {
  9176. if (!isLoopInvariant(LHS, L))
  9177. return None;
  9178. std::swap(LHS, RHS);
  9179. Pred = ICmpInst::getSwappedPredicate(Pred);
  9180. }
  9181. auto *AR = dyn_cast<SCEVAddRecExpr>(LHS);
  9182. if (!AR || AR->getLoop() != L)
  9183. return None;
  9184. // The predicate must be relational (i.e. <, <=, >=, >).
  9185. if (!ICmpInst::isRelational(Pred))
  9186. return None;
  9187. // TODO: Support steps other than +/- 1.
  9188. const SCEV *Step = AR->getStepRecurrence(*this);
  9189. auto *One = getOne(Step->getType());
  9190. auto *MinusOne = getNegativeSCEV(One);
  9191. if (Step != One && Step != MinusOne)
  9192. return None;
  9193. // Type mismatch here means that MaxIter is potentially larger than max
  9194. // unsigned value in start type, which mean we cannot prove no wrap for the
  9195. // indvar.
  9196. if (AR->getType() != MaxIter->getType())
  9197. return None;
  9198. // Value of IV on suggested last iteration.
  9199. const SCEV *Last = AR->evaluateAtIteration(MaxIter, *this);
  9200. // Does it still meet the requirement?
  9201. if (!isLoopBackedgeGuardedByCond(L, Pred, Last, RHS))
  9202. return None;
  9203. // Because step is +/- 1 and MaxIter has same type as Start (i.e. it does
  9204. // not exceed max unsigned value of this type), this effectively proves
  9205. // that there is no wrap during the iteration. To prove that there is no
  9206. // signed/unsigned wrap, we need to check that
  9207. // Start <= Last for step = 1 or Start >= Last for step = -1.
  9208. ICmpInst::Predicate NoOverflowPred =
  9209. CmpInst::isSigned(Pred) ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
  9210. if (Step == MinusOne)
  9211. NoOverflowPred = CmpInst::getSwappedPredicate(NoOverflowPred);
  9212. const SCEV *Start = AR->getStart();
  9213. if (!isKnownPredicateAt(NoOverflowPred, Start, Last, CtxI))
  9214. return None;
  9215. // Everything is fine.
  9216. return ScalarEvolution::LoopInvariantPredicate(Pred, Start, RHS);
  9217. }
  9218. bool ScalarEvolution::isKnownPredicateViaConstantRanges(
  9219. ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) {
  9220. if (HasSameValue(LHS, RHS))
  9221. return ICmpInst::isTrueWhenEqual(Pred);
  9222. // This code is split out from isKnownPredicate because it is called from
  9223. // within isLoopEntryGuardedByCond.
  9224. auto CheckRanges = [&](const ConstantRange &RangeLHS,
  9225. const ConstantRange &RangeRHS) {
  9226. return RangeLHS.icmp(Pred, RangeRHS);
  9227. };
  9228. // The check at the top of the function catches the case where the values are
  9229. // known to be equal.
  9230. if (Pred == CmpInst::ICMP_EQ)
  9231. return false;
  9232. if (Pred == CmpInst::ICMP_NE) {
  9233. if (CheckRanges(getSignedRange(LHS), getSignedRange(RHS)) ||
  9234. CheckRanges(getUnsignedRange(LHS), getUnsignedRange(RHS)))
  9235. return true;
  9236. auto *Diff = getMinusSCEV(LHS, RHS);
  9237. return !isa<SCEVCouldNotCompute>(Diff) && isKnownNonZero(Diff);
  9238. }
  9239. if (CmpInst::isSigned(Pred))
  9240. return CheckRanges(getSignedRange(LHS), getSignedRange(RHS));
  9241. return CheckRanges(getUnsignedRange(LHS), getUnsignedRange(RHS));
  9242. }
  9243. bool ScalarEvolution::isKnownPredicateViaNoOverflow(ICmpInst::Predicate Pred,
  9244. const SCEV *LHS,
  9245. const SCEV *RHS) {
  9246. // Match X to (A + C1)<ExpectedFlags> and Y to (A + C2)<ExpectedFlags>, where
  9247. // C1 and C2 are constant integers. If either X or Y are not add expressions,
  9248. // consider them as X + 0 and Y + 0 respectively. C1 and C2 are returned via
  9249. // OutC1 and OutC2.
  9250. auto MatchBinaryAddToConst = [this](const SCEV *X, const SCEV *Y,
  9251. APInt &OutC1, APInt &OutC2,
  9252. SCEV::NoWrapFlags ExpectedFlags) {
  9253. const SCEV *XNonConstOp, *XConstOp;
  9254. const SCEV *YNonConstOp, *YConstOp;
  9255. SCEV::NoWrapFlags XFlagsPresent;
  9256. SCEV::NoWrapFlags YFlagsPresent;
  9257. if (!splitBinaryAdd(X, XConstOp, XNonConstOp, XFlagsPresent)) {
  9258. XConstOp = getZero(X->getType());
  9259. XNonConstOp = X;
  9260. XFlagsPresent = ExpectedFlags;
  9261. }
  9262. if (!isa<SCEVConstant>(XConstOp) ||
  9263. (XFlagsPresent & ExpectedFlags) != ExpectedFlags)
  9264. return false;
  9265. if (!splitBinaryAdd(Y, YConstOp, YNonConstOp, YFlagsPresent)) {
  9266. YConstOp = getZero(Y->getType());
  9267. YNonConstOp = Y;
  9268. YFlagsPresent = ExpectedFlags;
  9269. }
  9270. if (!isa<SCEVConstant>(YConstOp) ||
  9271. (YFlagsPresent & ExpectedFlags) != ExpectedFlags)
  9272. return false;
  9273. if (YNonConstOp != XNonConstOp)
  9274. return false;
  9275. OutC1 = cast<SCEVConstant>(XConstOp)->getAPInt();
  9276. OutC2 = cast<SCEVConstant>(YConstOp)->getAPInt();
  9277. return true;
  9278. };
  9279. APInt C1;
  9280. APInt C2;
  9281. switch (Pred) {
  9282. default:
  9283. break;
  9284. case ICmpInst::ICMP_SGE:
  9285. std::swap(LHS, RHS);
  9286. LLVM_FALLTHROUGH;
  9287. case ICmpInst::ICMP_SLE:
  9288. // (X + C1)<nsw> s<= (X + C2)<nsw> if C1 s<= C2.
  9289. if (MatchBinaryAddToConst(LHS, RHS, C1, C2, SCEV::FlagNSW) && C1.sle(C2))
  9290. return true;
  9291. break;
  9292. case ICmpInst::ICMP_SGT:
  9293. std::swap(LHS, RHS);
  9294. LLVM_FALLTHROUGH;
  9295. case ICmpInst::ICMP_SLT:
  9296. // (X + C1)<nsw> s< (X + C2)<nsw> if C1 s< C2.
  9297. if (MatchBinaryAddToConst(LHS, RHS, C1, C2, SCEV::FlagNSW) && C1.slt(C2))
  9298. return true;
  9299. break;
  9300. case ICmpInst::ICMP_UGE:
  9301. std::swap(LHS, RHS);
  9302. LLVM_FALLTHROUGH;
  9303. case ICmpInst::ICMP_ULE:
  9304. // (X + C1)<nuw> u<= (X + C2)<nuw> for C1 u<= C2.
  9305. if (MatchBinaryAddToConst(RHS, LHS, C2, C1, SCEV::FlagNUW) && C1.ule(C2))
  9306. return true;
  9307. break;
  9308. case ICmpInst::ICMP_UGT:
  9309. std::swap(LHS, RHS);
  9310. LLVM_FALLTHROUGH;
  9311. case ICmpInst::ICMP_ULT:
  9312. // (X + C1)<nuw> u< (X + C2)<nuw> if C1 u< C2.
  9313. if (MatchBinaryAddToConst(RHS, LHS, C2, C1, SCEV::FlagNUW) && C1.ult(C2))
  9314. return true;
  9315. break;
  9316. }
  9317. return false;
  9318. }
  9319. bool ScalarEvolution::isKnownPredicateViaSplitting(ICmpInst::Predicate Pred,
  9320. const SCEV *LHS,
  9321. const SCEV *RHS) {
  9322. if (Pred != ICmpInst::ICMP_ULT || ProvingSplitPredicate)
  9323. return false;
  9324. // Allowing arbitrary number of activations of isKnownPredicateViaSplitting on
  9325. // the stack can result in exponential time complexity.
  9326. SaveAndRestore<bool> Restore(ProvingSplitPredicate, true);
  9327. // If L >= 0 then I `ult` L <=> I >= 0 && I `slt` L
  9328. //
  9329. // To prove L >= 0 we use isKnownNonNegative whereas to prove I >= 0 we use
  9330. // isKnownPredicate. isKnownPredicate is more powerful, but also more
  9331. // expensive; and using isKnownNonNegative(RHS) is sufficient for most of the
  9332. // interesting cases seen in practice. We can consider "upgrading" L >= 0 to
  9333. // use isKnownPredicate later if needed.
  9334. return isKnownNonNegative(RHS) &&
  9335. isKnownPredicate(CmpInst::ICMP_SGE, LHS, getZero(LHS->getType())) &&
  9336. isKnownPredicate(CmpInst::ICMP_SLT, LHS, RHS);
  9337. }
  9338. bool ScalarEvolution::isImpliedViaGuard(const BasicBlock *BB,
  9339. ICmpInst::Predicate Pred,
  9340. const SCEV *LHS, const SCEV *RHS) {
  9341. // No need to even try if we know the module has no guards.
  9342. if (!HasGuards)
  9343. return false;
  9344. return any_of(*BB, [&](const Instruction &I) {
  9345. using namespace llvm::PatternMatch;
  9346. Value *Condition;
  9347. return match(&I, m_Intrinsic<Intrinsic::experimental_guard>(
  9348. m_Value(Condition))) &&
  9349. isImpliedCond(Pred, LHS, RHS, Condition, false);
  9350. });
  9351. }
  9352. /// isLoopBackedgeGuardedByCond - Test whether the backedge of the loop is
  9353. /// protected by a conditional between LHS and RHS. This is used to
  9354. /// to eliminate casts.
  9355. bool
  9356. ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L,
  9357. ICmpInst::Predicate Pred,
  9358. const SCEV *LHS, const SCEV *RHS) {
  9359. // Interpret a null as meaning no loop, where there is obviously no guard
  9360. // (interprocedural conditions notwithstanding).
  9361. if (!L) return true;
  9362. if (VerifyIR)
  9363. assert(!verifyFunction(*L->getHeader()->getParent(), &dbgs()) &&
  9364. "This cannot be done on broken IR!");
  9365. if (isKnownViaNonRecursiveReasoning(Pred, LHS, RHS))
  9366. return true;
  9367. BasicBlock *Latch = L->getLoopLatch();
  9368. if (!Latch)
  9369. return false;
  9370. BranchInst *LoopContinuePredicate =
  9371. dyn_cast<BranchInst>(Latch->getTerminator());
  9372. if (LoopContinuePredicate && LoopContinuePredicate->isConditional() &&
  9373. isImpliedCond(Pred, LHS, RHS,
  9374. LoopContinuePredicate->getCondition(),
  9375. LoopContinuePredicate->getSuccessor(0) != L->getHeader()))
  9376. return true;
  9377. // We don't want more than one activation of the following loops on the stack
  9378. // -- that can lead to O(n!) time complexity.
  9379. if (WalkingBEDominatingConds)
  9380. return false;
  9381. SaveAndRestore<bool> ClearOnExit(WalkingBEDominatingConds, true);
  9382. // See if we can exploit a trip count to prove the predicate.
  9383. const auto &BETakenInfo = getBackedgeTakenInfo(L);
  9384. const SCEV *LatchBECount = BETakenInfo.getExact(Latch, this);
  9385. if (LatchBECount != getCouldNotCompute()) {
  9386. // We know that Latch branches back to the loop header exactly
  9387. // LatchBECount times. This means the backdege condition at Latch is
  9388. // equivalent to "{0,+,1} u< LatchBECount".
  9389. Type *Ty = LatchBECount->getType();
  9390. auto NoWrapFlags = SCEV::NoWrapFlags(SCEV::FlagNUW | SCEV::FlagNW);
  9391. const SCEV *LoopCounter =
  9392. getAddRecExpr(getZero(Ty), getOne(Ty), L, NoWrapFlags);
  9393. if (isImpliedCond(Pred, LHS, RHS, ICmpInst::ICMP_ULT, LoopCounter,
  9394. LatchBECount))
  9395. return true;
  9396. }
  9397. // Check conditions due to any @llvm.assume intrinsics.
  9398. for (auto &AssumeVH : AC.assumptions()) {
  9399. if (!AssumeVH)
  9400. continue;
  9401. auto *CI = cast<CallInst>(AssumeVH);
  9402. if (!DT.dominates(CI, Latch->getTerminator()))
  9403. continue;
  9404. if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false))
  9405. return true;
  9406. }
  9407. // If the loop is not reachable from the entry block, we risk running into an
  9408. // infinite loop as we walk up into the dom tree. These loops do not matter
  9409. // anyway, so we just return a conservative answer when we see them.
  9410. if (!DT.isReachableFromEntry(L->getHeader()))
  9411. return false;
  9412. if (isImpliedViaGuard(Latch, Pred, LHS, RHS))
  9413. return true;
  9414. for (DomTreeNode *DTN = DT[Latch], *HeaderDTN = DT[L->getHeader()];
  9415. DTN != HeaderDTN; DTN = DTN->getIDom()) {
  9416. assert(DTN && "should reach the loop header before reaching the root!");
  9417. BasicBlock *BB = DTN->getBlock();
  9418. if (isImpliedViaGuard(BB, Pred, LHS, RHS))
  9419. return true;
  9420. BasicBlock *PBB = BB->getSinglePredecessor();
  9421. if (!PBB)
  9422. continue;
  9423. BranchInst *ContinuePredicate = dyn_cast<BranchInst>(PBB->getTerminator());
  9424. if (!ContinuePredicate || !ContinuePredicate->isConditional())
  9425. continue;
  9426. Value *Condition = ContinuePredicate->getCondition();
  9427. // If we have an edge `E` within the loop body that dominates the only
  9428. // latch, the condition guarding `E` also guards the backedge. This
  9429. // reasoning works only for loops with a single latch.
  9430. BasicBlockEdge DominatingEdge(PBB, BB);
  9431. if (DominatingEdge.isSingleEdge()) {
  9432. // We're constructively (and conservatively) enumerating edges within the
  9433. // loop body that dominate the latch. The dominator tree better agree
  9434. // with us on this:
  9435. assert(DT.dominates(DominatingEdge, Latch) && "should be!");
  9436. if (isImpliedCond(Pred, LHS, RHS, Condition,
  9437. BB != ContinuePredicate->getSuccessor(0)))
  9438. return true;
  9439. }
  9440. }
  9441. return false;
  9442. }
  9443. bool ScalarEvolution::isBasicBlockEntryGuardedByCond(const BasicBlock *BB,
  9444. ICmpInst::Predicate Pred,
  9445. const SCEV *LHS,
  9446. const SCEV *RHS) {
  9447. if (VerifyIR)
  9448. assert(!verifyFunction(*BB->getParent(), &dbgs()) &&
  9449. "This cannot be done on broken IR!");
  9450. // If we cannot prove strict comparison (e.g. a > b), maybe we can prove
  9451. // the facts (a >= b && a != b) separately. A typical situation is when the
  9452. // non-strict comparison is known from ranges and non-equality is known from
  9453. // dominating predicates. If we are proving strict comparison, we always try
  9454. // to prove non-equality and non-strict comparison separately.
  9455. auto NonStrictPredicate = ICmpInst::getNonStrictPredicate(Pred);
  9456. const bool ProvingStrictComparison = (Pred != NonStrictPredicate);
  9457. bool ProvedNonStrictComparison = false;
  9458. bool ProvedNonEquality = false;
  9459. auto SplitAndProve =
  9460. [&](std::function<bool(ICmpInst::Predicate)> Fn) -> bool {
  9461. if (!ProvedNonStrictComparison)
  9462. ProvedNonStrictComparison = Fn(NonStrictPredicate);
  9463. if (!ProvedNonEquality)
  9464. ProvedNonEquality = Fn(ICmpInst::ICMP_NE);
  9465. if (ProvedNonStrictComparison && ProvedNonEquality)
  9466. return true;
  9467. return false;
  9468. };
  9469. if (ProvingStrictComparison) {
  9470. auto ProofFn = [&](ICmpInst::Predicate P) {
  9471. return isKnownViaNonRecursiveReasoning(P, LHS, RHS);
  9472. };
  9473. if (SplitAndProve(ProofFn))
  9474. return true;
  9475. }
  9476. // Try to prove (Pred, LHS, RHS) using isImpliedViaGuard.
  9477. auto ProveViaGuard = [&](const BasicBlock *Block) {
  9478. if (isImpliedViaGuard(Block, Pred, LHS, RHS))
  9479. return true;
  9480. if (ProvingStrictComparison) {
  9481. auto ProofFn = [&](ICmpInst::Predicate P) {
  9482. return isImpliedViaGuard(Block, P, LHS, RHS);
  9483. };
  9484. if (SplitAndProve(ProofFn))
  9485. return true;
  9486. }
  9487. return false;
  9488. };
  9489. // Try to prove (Pred, LHS, RHS) using isImpliedCond.
  9490. auto ProveViaCond = [&](const Value *Condition, bool Inverse) {
  9491. const Instruction *CtxI = &BB->front();
  9492. if (isImpliedCond(Pred, LHS, RHS, Condition, Inverse, CtxI))
  9493. return true;
  9494. if (ProvingStrictComparison) {
  9495. auto ProofFn = [&](ICmpInst::Predicate P) {
  9496. return isImpliedCond(P, LHS, RHS, Condition, Inverse, CtxI);
  9497. };
  9498. if (SplitAndProve(ProofFn))
  9499. return true;
  9500. }
  9501. return false;
  9502. };
  9503. // Starting at the block's predecessor, climb up the predecessor chain, as long
  9504. // as there are predecessors that can be found that have unique successors
  9505. // leading to the original block.
  9506. const Loop *ContainingLoop = LI.getLoopFor(BB);
  9507. const BasicBlock *PredBB;
  9508. if (ContainingLoop && ContainingLoop->getHeader() == BB)
  9509. PredBB = ContainingLoop->getLoopPredecessor();
  9510. else
  9511. PredBB = BB->getSinglePredecessor();
  9512. for (std::pair<const BasicBlock *, const BasicBlock *> Pair(PredBB, BB);
  9513. Pair.first; Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) {
  9514. if (ProveViaGuard(Pair.first))
  9515. return true;
  9516. const BranchInst *LoopEntryPredicate =
  9517. dyn_cast<BranchInst>(Pair.first->getTerminator());
  9518. if (!LoopEntryPredicate ||
  9519. LoopEntryPredicate->isUnconditional())
  9520. continue;
  9521. if (ProveViaCond(LoopEntryPredicate->getCondition(),
  9522. LoopEntryPredicate->getSuccessor(0) != Pair.second))
  9523. return true;
  9524. }
  9525. // Check conditions due to any @llvm.assume intrinsics.
  9526. for (auto &AssumeVH : AC.assumptions()) {
  9527. if (!AssumeVH)
  9528. continue;
  9529. auto *CI = cast<CallInst>(AssumeVH);
  9530. if (!DT.dominates(CI, BB))
  9531. continue;
  9532. if (ProveViaCond(CI->getArgOperand(0), false))
  9533. return true;
  9534. }
  9535. return false;
  9536. }
  9537. bool ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,
  9538. ICmpInst::Predicate Pred,
  9539. const SCEV *LHS,
  9540. const SCEV *RHS) {
  9541. // Interpret a null as meaning no loop, where there is obviously no guard
  9542. // (interprocedural conditions notwithstanding).
  9543. if (!L)
  9544. return false;
  9545. // Both LHS and RHS must be available at loop entry.
  9546. assert(isAvailableAtLoopEntry(LHS, L) &&
  9547. "LHS is not available at Loop Entry");
  9548. assert(isAvailableAtLoopEntry(RHS, L) &&
  9549. "RHS is not available at Loop Entry");
  9550. if (isKnownViaNonRecursiveReasoning(Pred, LHS, RHS))
  9551. return true;
  9552. return isBasicBlockEntryGuardedByCond(L->getHeader(), Pred, LHS, RHS);
  9553. }
  9554. bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS,
  9555. const SCEV *RHS,
  9556. const Value *FoundCondValue, bool Inverse,
  9557. const Instruction *CtxI) {
  9558. // False conditions implies anything. Do not bother analyzing it further.
  9559. if (FoundCondValue ==
  9560. ConstantInt::getBool(FoundCondValue->getContext(), Inverse))
  9561. return true;
  9562. if (!PendingLoopPredicates.insert(FoundCondValue).second)
  9563. return false;
  9564. auto ClearOnExit =
  9565. make_scope_exit([&]() { PendingLoopPredicates.erase(FoundCondValue); });
  9566. // Recursively handle And and Or conditions.
  9567. const Value *Op0, *Op1;
  9568. if (match(FoundCondValue, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
  9569. if (!Inverse)
  9570. return isImpliedCond(Pred, LHS, RHS, Op0, Inverse, CtxI) ||
  9571. isImpliedCond(Pred, LHS, RHS, Op1, Inverse, CtxI);
  9572. } else if (match(FoundCondValue, m_LogicalOr(m_Value(Op0), m_Value(Op1)))) {
  9573. if (Inverse)
  9574. return isImpliedCond(Pred, LHS, RHS, Op0, Inverse, CtxI) ||
  9575. isImpliedCond(Pred, LHS, RHS, Op1, Inverse, CtxI);
  9576. }
  9577. const ICmpInst *ICI = dyn_cast<ICmpInst>(FoundCondValue);
  9578. if (!ICI) return false;
  9579. // Now that we found a conditional branch that dominates the loop or controls
  9580. // the loop latch. Check to see if it is the comparison we are looking for.
  9581. ICmpInst::Predicate FoundPred;
  9582. if (Inverse)
  9583. FoundPred = ICI->getInversePredicate();
  9584. else
  9585. FoundPred = ICI->getPredicate();
  9586. const SCEV *FoundLHS = getSCEV(ICI->getOperand(0));
  9587. const SCEV *FoundRHS = getSCEV(ICI->getOperand(1));
  9588. return isImpliedCond(Pred, LHS, RHS, FoundPred, FoundLHS, FoundRHS, CtxI);
  9589. }
  9590. bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS,
  9591. const SCEV *RHS,
  9592. ICmpInst::Predicate FoundPred,
  9593. const SCEV *FoundLHS, const SCEV *FoundRHS,
  9594. const Instruction *CtxI) {
  9595. // Balance the types.
  9596. if (getTypeSizeInBits(LHS->getType()) <
  9597. getTypeSizeInBits(FoundLHS->getType())) {
  9598. // For unsigned and equality predicates, try to prove that both found
  9599. // operands fit into narrow unsigned range. If so, try to prove facts in
  9600. // narrow types.
  9601. if (!CmpInst::isSigned(FoundPred) && !FoundLHS->getType()->isPointerTy() &&
  9602. !FoundRHS->getType()->isPointerTy()) {
  9603. auto *NarrowType = LHS->getType();
  9604. auto *WideType = FoundLHS->getType();
  9605. auto BitWidth = getTypeSizeInBits(NarrowType);
  9606. const SCEV *MaxValue = getZeroExtendExpr(
  9607. getConstant(APInt::getMaxValue(BitWidth)), WideType);
  9608. if (isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_ULE, FoundLHS,
  9609. MaxValue) &&
  9610. isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_ULE, FoundRHS,
  9611. MaxValue)) {
  9612. const SCEV *TruncFoundLHS = getTruncateExpr(FoundLHS, NarrowType);
  9613. const SCEV *TruncFoundRHS = getTruncateExpr(FoundRHS, NarrowType);
  9614. if (isImpliedCondBalancedTypes(Pred, LHS, RHS, FoundPred, TruncFoundLHS,
  9615. TruncFoundRHS, CtxI))
  9616. return true;
  9617. }
  9618. }
  9619. if (LHS->getType()->isPointerTy() || RHS->getType()->isPointerTy())
  9620. return false;
  9621. if (CmpInst::isSigned(Pred)) {
  9622. LHS = getSignExtendExpr(LHS, FoundLHS->getType());
  9623. RHS = getSignExtendExpr(RHS, FoundLHS->getType());
  9624. } else {
  9625. LHS = getZeroExtendExpr(LHS, FoundLHS->getType());
  9626. RHS = getZeroExtendExpr(RHS, FoundLHS->getType());
  9627. }
  9628. } else if (getTypeSizeInBits(LHS->getType()) >
  9629. getTypeSizeInBits(FoundLHS->getType())) {
  9630. if (FoundLHS->getType()->isPointerTy() || FoundRHS->getType()->isPointerTy())
  9631. return false;
  9632. if (CmpInst::isSigned(FoundPred)) {
  9633. FoundLHS = getSignExtendExpr(FoundLHS, LHS->getType());
  9634. FoundRHS = getSignExtendExpr(FoundRHS, LHS->getType());
  9635. } else {
  9636. FoundLHS = getZeroExtendExpr(FoundLHS, LHS->getType());
  9637. FoundRHS = getZeroExtendExpr(FoundRHS, LHS->getType());
  9638. }
  9639. }
  9640. return isImpliedCondBalancedTypes(Pred, LHS, RHS, FoundPred, FoundLHS,
  9641. FoundRHS, CtxI);
  9642. }
  9643. bool ScalarEvolution::isImpliedCondBalancedTypes(
  9644. ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS,
  9645. ICmpInst::Predicate FoundPred, const SCEV *FoundLHS, const SCEV *FoundRHS,
  9646. const Instruction *CtxI) {
  9647. assert(getTypeSizeInBits(LHS->getType()) ==
  9648. getTypeSizeInBits(FoundLHS->getType()) &&
  9649. "Types should be balanced!");
  9650. // Canonicalize the query to match the way instcombine will have
  9651. // canonicalized the comparison.
  9652. if (SimplifyICmpOperands(Pred, LHS, RHS))
  9653. if (LHS == RHS)
  9654. return CmpInst::isTrueWhenEqual(Pred);
  9655. if (SimplifyICmpOperands(FoundPred, FoundLHS, FoundRHS))
  9656. if (FoundLHS == FoundRHS)
  9657. return CmpInst::isFalseWhenEqual(FoundPred);
  9658. // Check to see if we can make the LHS or RHS match.
  9659. if (LHS == FoundRHS || RHS == FoundLHS) {
  9660. if (isa<SCEVConstant>(RHS)) {
  9661. std::swap(FoundLHS, FoundRHS);
  9662. FoundPred = ICmpInst::getSwappedPredicate(FoundPred);
  9663. } else {
  9664. std::swap(LHS, RHS);
  9665. Pred = ICmpInst::getSwappedPredicate(Pred);
  9666. }
  9667. }
  9668. // Check whether the found predicate is the same as the desired predicate.
  9669. if (FoundPred == Pred)
  9670. return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, CtxI);
  9671. // Check whether swapping the found predicate makes it the same as the
  9672. // desired predicate.
  9673. if (ICmpInst::getSwappedPredicate(FoundPred) == Pred) {
  9674. // We can write the implication
  9675. // 0. LHS Pred RHS <- FoundLHS SwapPred FoundRHS
  9676. // using one of the following ways:
  9677. // 1. LHS Pred RHS <- FoundRHS Pred FoundLHS
  9678. // 2. RHS SwapPred LHS <- FoundLHS SwapPred FoundRHS
  9679. // 3. LHS Pred RHS <- ~FoundLHS Pred ~FoundRHS
  9680. // 4. ~LHS SwapPred ~RHS <- FoundLHS SwapPred FoundRHS
  9681. // Forms 1. and 2. require swapping the operands of one condition. Don't
  9682. // do this if it would break canonical constant/addrec ordering.
  9683. if (!isa<SCEVConstant>(RHS) && !isa<SCEVAddRecExpr>(LHS))
  9684. return isImpliedCondOperands(FoundPred, RHS, LHS, FoundLHS, FoundRHS,
  9685. CtxI);
  9686. if (!isa<SCEVConstant>(FoundRHS) && !isa<SCEVAddRecExpr>(FoundLHS))
  9687. return isImpliedCondOperands(Pred, LHS, RHS, FoundRHS, FoundLHS, CtxI);
  9688. // There's no clear preference between forms 3. and 4., try both. Avoid
  9689. // forming getNotSCEV of pointer values as the resulting subtract is
  9690. // not legal.
  9691. if (!LHS->getType()->isPointerTy() && !RHS->getType()->isPointerTy() &&
  9692. isImpliedCondOperands(FoundPred, getNotSCEV(LHS), getNotSCEV(RHS),
  9693. FoundLHS, FoundRHS, CtxI))
  9694. return true;
  9695. if (!FoundLHS->getType()->isPointerTy() &&
  9696. !FoundRHS->getType()->isPointerTy() &&
  9697. isImpliedCondOperands(Pred, LHS, RHS, getNotSCEV(FoundLHS),
  9698. getNotSCEV(FoundRHS), CtxI))
  9699. return true;
  9700. return false;
  9701. }
  9702. auto IsSignFlippedPredicate = [](CmpInst::Predicate P1,
  9703. CmpInst::Predicate P2) {
  9704. assert(P1 != P2 && "Handled earlier!");
  9705. return CmpInst::isRelational(P2) &&
  9706. P1 == CmpInst::getFlippedSignednessPredicate(P2);
  9707. };
  9708. if (IsSignFlippedPredicate(Pred, FoundPred)) {
  9709. // Unsigned comparison is the same as signed comparison when both the
  9710. // operands are non-negative or negative.
  9711. if ((isKnownNonNegative(FoundLHS) && isKnownNonNegative(FoundRHS)) ||
  9712. (isKnownNegative(FoundLHS) && isKnownNegative(FoundRHS)))
  9713. return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, CtxI);
  9714. // Create local copies that we can freely swap and canonicalize our
  9715. // conditions to "le/lt".
  9716. ICmpInst::Predicate CanonicalPred = Pred, CanonicalFoundPred = FoundPred;
  9717. const SCEV *CanonicalLHS = LHS, *CanonicalRHS = RHS,
  9718. *CanonicalFoundLHS = FoundLHS, *CanonicalFoundRHS = FoundRHS;
  9719. if (ICmpInst::isGT(CanonicalPred) || ICmpInst::isGE(CanonicalPred)) {
  9720. CanonicalPred = ICmpInst::getSwappedPredicate(CanonicalPred);
  9721. CanonicalFoundPred = ICmpInst::getSwappedPredicate(CanonicalFoundPred);
  9722. std::swap(CanonicalLHS, CanonicalRHS);
  9723. std::swap(CanonicalFoundLHS, CanonicalFoundRHS);
  9724. }
  9725. assert((ICmpInst::isLT(CanonicalPred) || ICmpInst::isLE(CanonicalPred)) &&
  9726. "Must be!");
  9727. assert((ICmpInst::isLT(CanonicalFoundPred) ||
  9728. ICmpInst::isLE(CanonicalFoundPred)) &&
  9729. "Must be!");
  9730. if (ICmpInst::isSigned(CanonicalPred) && isKnownNonNegative(CanonicalRHS))
  9731. // Use implication:
  9732. // x <u y && y >=s 0 --> x <s y.
  9733. // If we can prove the left part, the right part is also proven.
  9734. return isImpliedCondOperands(CanonicalFoundPred, CanonicalLHS,
  9735. CanonicalRHS, CanonicalFoundLHS,
  9736. CanonicalFoundRHS);
  9737. if (ICmpInst::isUnsigned(CanonicalPred) && isKnownNegative(CanonicalRHS))
  9738. // Use implication:
  9739. // x <s y && y <s 0 --> x <u y.
  9740. // If we can prove the left part, the right part is also proven.
  9741. return isImpliedCondOperands(CanonicalFoundPred, CanonicalLHS,
  9742. CanonicalRHS, CanonicalFoundLHS,
  9743. CanonicalFoundRHS);
  9744. }
  9745. // Check if we can make progress by sharpening ranges.
  9746. if (FoundPred == ICmpInst::ICMP_NE &&
  9747. (isa<SCEVConstant>(FoundLHS) || isa<SCEVConstant>(FoundRHS))) {
  9748. const SCEVConstant *C = nullptr;
  9749. const SCEV *V = nullptr;
  9750. if (isa<SCEVConstant>(FoundLHS)) {
  9751. C = cast<SCEVConstant>(FoundLHS);
  9752. V = FoundRHS;
  9753. } else {
  9754. C = cast<SCEVConstant>(FoundRHS);
  9755. V = FoundLHS;
  9756. }
  9757. // The guarding predicate tells us that C != V. If the known range
  9758. // of V is [C, t), we can sharpen the range to [C + 1, t). The
  9759. // range we consider has to correspond to same signedness as the
  9760. // predicate we're interested in folding.
  9761. APInt Min = ICmpInst::isSigned(Pred) ?
  9762. getSignedRangeMin(V) : getUnsignedRangeMin(V);
  9763. if (Min == C->getAPInt()) {
  9764. // Given (V >= Min && V != Min) we conclude V >= (Min + 1).
  9765. // This is true even if (Min + 1) wraps around -- in case of
  9766. // wraparound, (Min + 1) < Min, so (V >= Min => V >= (Min + 1)).
  9767. APInt SharperMin = Min + 1;
  9768. switch (Pred) {
  9769. case ICmpInst::ICMP_SGE:
  9770. case ICmpInst::ICMP_UGE:
  9771. // We know V `Pred` SharperMin. If this implies LHS `Pred`
  9772. // RHS, we're done.
  9773. if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(SharperMin),
  9774. CtxI))
  9775. return true;
  9776. LLVM_FALLTHROUGH;
  9777. case ICmpInst::ICMP_SGT:
  9778. case ICmpInst::ICMP_UGT:
  9779. // We know from the range information that (V `Pred` Min ||
  9780. // V == Min). We know from the guarding condition that !(V
  9781. // == Min). This gives us
  9782. //
  9783. // V `Pred` Min || V == Min && !(V == Min)
  9784. // => V `Pred` Min
  9785. //
  9786. // If V `Pred` Min implies LHS `Pred` RHS, we're done.
  9787. if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(Min), CtxI))
  9788. return true;
  9789. break;
  9790. // `LHS < RHS` and `LHS <= RHS` are handled in the same way as `RHS > LHS` and `RHS >= LHS` respectively.
  9791. case ICmpInst::ICMP_SLE:
  9792. case ICmpInst::ICMP_ULE:
  9793. if (isImpliedCondOperands(CmpInst::getSwappedPredicate(Pred), RHS,
  9794. LHS, V, getConstant(SharperMin), CtxI))
  9795. return true;
  9796. LLVM_FALLTHROUGH;
  9797. case ICmpInst::ICMP_SLT:
  9798. case ICmpInst::ICMP_ULT:
  9799. if (isImpliedCondOperands(CmpInst::getSwappedPredicate(Pred), RHS,
  9800. LHS, V, getConstant(Min), CtxI))
  9801. return true;
  9802. break;
  9803. default:
  9804. // No change
  9805. break;
  9806. }
  9807. }
  9808. }
  9809. // Check whether the actual condition is beyond sufficient.
  9810. if (FoundPred == ICmpInst::ICMP_EQ)
  9811. if (ICmpInst::isTrueWhenEqual(Pred))
  9812. if (isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, CtxI))
  9813. return true;
  9814. if (Pred == ICmpInst::ICMP_NE)
  9815. if (!ICmpInst::isTrueWhenEqual(FoundPred))
  9816. if (isImpliedCondOperands(FoundPred, LHS, RHS, FoundLHS, FoundRHS, CtxI))
  9817. return true;
  9818. // Otherwise assume the worst.
  9819. return false;
  9820. }
  9821. bool ScalarEvolution::splitBinaryAdd(const SCEV *Expr,
  9822. const SCEV *&L, const SCEV *&R,
  9823. SCEV::NoWrapFlags &Flags) {
  9824. const auto *AE = dyn_cast<SCEVAddExpr>(Expr);
  9825. if (!AE || AE->getNumOperands() != 2)
  9826. return false;
  9827. L = AE->getOperand(0);
  9828. R = AE->getOperand(1);
  9829. Flags = AE->getNoWrapFlags();
  9830. return true;
  9831. }
  9832. Optional<APInt> ScalarEvolution::computeConstantDifference(const SCEV *More,
  9833. const SCEV *Less) {
  9834. // We avoid subtracting expressions here because this function is usually
  9835. // fairly deep in the call stack (i.e. is called many times).
  9836. // X - X = 0.
  9837. if (More == Less)
  9838. return APInt(getTypeSizeInBits(More->getType()), 0);
  9839. if (isa<SCEVAddRecExpr>(Less) && isa<SCEVAddRecExpr>(More)) {
  9840. const auto *LAR = cast<SCEVAddRecExpr>(Less);
  9841. const auto *MAR = cast<SCEVAddRecExpr>(More);
  9842. if (LAR->getLoop() != MAR->getLoop())
  9843. return None;
  9844. // We look at affine expressions only; not for correctness but to keep
  9845. // getStepRecurrence cheap.
  9846. if (!LAR->isAffine() || !MAR->isAffine())
  9847. return None;
  9848. if (LAR->getStepRecurrence(*this) != MAR->getStepRecurrence(*this))
  9849. return None;
  9850. Less = LAR->getStart();
  9851. More = MAR->getStart();
  9852. // fall through
  9853. }
  9854. if (isa<SCEVConstant>(Less) && isa<SCEVConstant>(More)) {
  9855. const auto &M = cast<SCEVConstant>(More)->getAPInt();
  9856. const auto &L = cast<SCEVConstant>(Less)->getAPInt();
  9857. return M - L;
  9858. }
  9859. SCEV::NoWrapFlags Flags;
  9860. const SCEV *LLess = nullptr, *RLess = nullptr;
  9861. const SCEV *LMore = nullptr, *RMore = nullptr;
  9862. const SCEVConstant *C1 = nullptr, *C2 = nullptr;
  9863. // Compare (X + C1) vs X.
  9864. if (splitBinaryAdd(Less, LLess, RLess, Flags))
  9865. if ((C1 = dyn_cast<SCEVConstant>(LLess)))
  9866. if (RLess == More)
  9867. return -(C1->getAPInt());
  9868. // Compare X vs (X + C2).
  9869. if (splitBinaryAdd(More, LMore, RMore, Flags))
  9870. if ((C2 = dyn_cast<SCEVConstant>(LMore)))
  9871. if (RMore == Less)
  9872. return C2->getAPInt();
  9873. // Compare (X + C1) vs (X + C2).
  9874. if (C1 && C2 && RLess == RMore)
  9875. return C2->getAPInt() - C1->getAPInt();
  9876. return None;
  9877. }
  9878. bool ScalarEvolution::isImpliedCondOperandsViaAddRecStart(
  9879. ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS,
  9880. const SCEV *FoundLHS, const SCEV *FoundRHS, const Instruction *CtxI) {
  9881. // Try to recognize the following pattern:
  9882. //
  9883. // FoundRHS = ...
  9884. // ...
  9885. // loop:
  9886. // FoundLHS = {Start,+,W}
  9887. // context_bb: // Basic block from the same loop
  9888. // known(Pred, FoundLHS, FoundRHS)
  9889. //
  9890. // If some predicate is known in the context of a loop, it is also known on
  9891. // each iteration of this loop, including the first iteration. Therefore, in
  9892. // this case, `FoundLHS Pred FoundRHS` implies `Start Pred FoundRHS`. Try to
  9893. // prove the original pred using this fact.
  9894. if (!CtxI)
  9895. return false;
  9896. const BasicBlock *ContextBB = CtxI->getParent();
  9897. // Make sure AR varies in the context block.
  9898. if (auto *AR = dyn_cast<SCEVAddRecExpr>(FoundLHS)) {
  9899. const Loop *L = AR->getLoop();
  9900. // Make sure that context belongs to the loop and executes on 1st iteration
  9901. // (if it ever executes at all).
  9902. if (!L->contains(ContextBB) || !DT.dominates(ContextBB, L->getLoopLatch()))
  9903. return false;
  9904. if (!isAvailableAtLoopEntry(FoundRHS, AR->getLoop()))
  9905. return false;
  9906. return isImpliedCondOperands(Pred, LHS, RHS, AR->getStart(), FoundRHS);
  9907. }
  9908. if (auto *AR = dyn_cast<SCEVAddRecExpr>(FoundRHS)) {
  9909. const Loop *L = AR->getLoop();
  9910. // Make sure that context belongs to the loop and executes on 1st iteration
  9911. // (if it ever executes at all).
  9912. if (!L->contains(ContextBB) || !DT.dominates(ContextBB, L->getLoopLatch()))
  9913. return false;
  9914. if (!isAvailableAtLoopEntry(FoundLHS, AR->getLoop()))
  9915. return false;
  9916. return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, AR->getStart());
  9917. }
  9918. return false;
  9919. }
  9920. bool ScalarEvolution::isImpliedCondOperandsViaNoOverflow(
  9921. ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS,
  9922. const SCEV *FoundLHS, const SCEV *FoundRHS) {
  9923. if (Pred != CmpInst::ICMP_SLT && Pred != CmpInst::ICMP_ULT)
  9924. return false;
  9925. const auto *AddRecLHS = dyn_cast<SCEVAddRecExpr>(LHS);
  9926. if (!AddRecLHS)
  9927. return false;
  9928. const auto *AddRecFoundLHS = dyn_cast<SCEVAddRecExpr>(FoundLHS);
  9929. if (!AddRecFoundLHS)
  9930. return false;
  9931. // We'd like to let SCEV reason about control dependencies, so we constrain
  9932. // both the inequalities to be about add recurrences on the same loop. This
  9933. // way we can use isLoopEntryGuardedByCond later.
  9934. const Loop *L = AddRecFoundLHS->getLoop();
  9935. if (L != AddRecLHS->getLoop())
  9936. return false;
  9937. // FoundLHS u< FoundRHS u< -C => (FoundLHS + C) u< (FoundRHS + C) ... (1)
  9938. //
  9939. // FoundLHS s< FoundRHS s< INT_MIN - C => (FoundLHS + C) s< (FoundRHS + C)
  9940. // ... (2)
  9941. //
  9942. // Informal proof for (2), assuming (1) [*]:
  9943. //
  9944. // We'll also assume (A s< B) <=> ((A + INT_MIN) u< (B + INT_MIN)) ... (3)[**]
  9945. //
  9946. // Then
  9947. //
  9948. // FoundLHS s< FoundRHS s< INT_MIN - C
  9949. // <=> (FoundLHS + INT_MIN) u< (FoundRHS + INT_MIN) u< -C [ using (3) ]
  9950. // <=> (FoundLHS + INT_MIN + C) u< (FoundRHS + INT_MIN + C) [ using (1) ]
  9951. // <=> (FoundLHS + INT_MIN + C + INT_MIN) s<
  9952. // (FoundRHS + INT_MIN + C + INT_MIN) [ using (3) ]
  9953. // <=> FoundLHS + C s< FoundRHS + C
  9954. //
  9955. // [*]: (1) can be proved by ruling out overflow.
  9956. //
  9957. // [**]: This can be proved by analyzing all the four possibilities:
  9958. // (A s< 0, B s< 0), (A s< 0, B s>= 0), (A s>= 0, B s< 0) and
  9959. // (A s>= 0, B s>= 0).
  9960. //
  9961. // Note:
  9962. // Despite (2), "FoundRHS s< INT_MIN - C" does not mean that "FoundRHS + C"
  9963. // will not sign underflow. For instance, say FoundLHS = (i8 -128), FoundRHS
  9964. // = (i8 -127) and C = (i8 -100). Then INT_MIN - C = (i8 -28), and FoundRHS
  9965. // s< (INT_MIN - C). Lack of sign overflow / underflow in "FoundRHS + C" is
  9966. // neither necessary nor sufficient to prove "(FoundLHS + C) s< (FoundRHS +
  9967. // C)".
  9968. Optional<APInt> LDiff = computeConstantDifference(LHS, FoundLHS);
  9969. Optional<APInt> RDiff = computeConstantDifference(RHS, FoundRHS);
  9970. if (!LDiff || !RDiff || *LDiff != *RDiff)
  9971. return false;
  9972. if (LDiff->isMinValue())
  9973. return true;
  9974. APInt FoundRHSLimit;
  9975. if (Pred == CmpInst::ICMP_ULT) {
  9976. FoundRHSLimit = -(*RDiff);
  9977. } else {
  9978. assert(Pred == CmpInst::ICMP_SLT && "Checked above!");
  9979. FoundRHSLimit = APInt::getSignedMinValue(getTypeSizeInBits(RHS->getType())) - *RDiff;
  9980. }
  9981. // Try to prove (1) or (2), as needed.
  9982. return isAvailableAtLoopEntry(FoundRHS, L) &&
  9983. isLoopEntryGuardedByCond(L, Pred, FoundRHS,
  9984. getConstant(FoundRHSLimit));
  9985. }
  9986. bool ScalarEvolution::isImpliedViaMerge(ICmpInst::Predicate Pred,
  9987. const SCEV *LHS, const SCEV *RHS,
  9988. const SCEV *FoundLHS,
  9989. const SCEV *FoundRHS, unsigned Depth) {
  9990. const PHINode *LPhi = nullptr, *RPhi = nullptr;
  9991. auto ClearOnExit = make_scope_exit([&]() {
  9992. if (LPhi) {
  9993. bool Erased = PendingMerges.erase(LPhi);
  9994. assert(Erased && "Failed to erase LPhi!");
  9995. (void)Erased;
  9996. }
  9997. if (RPhi) {
  9998. bool Erased = PendingMerges.erase(RPhi);
  9999. assert(Erased && "Failed to erase RPhi!");
  10000. (void)Erased;
  10001. }
  10002. });
  10003. // Find respective Phis and check that they are not being pending.
  10004. if (const SCEVUnknown *LU = dyn_cast<SCEVUnknown>(LHS))
  10005. if (auto *Phi = dyn_cast<PHINode>(LU->getValue())) {
  10006. if (!PendingMerges.insert(Phi).second)
  10007. return false;
  10008. LPhi = Phi;
  10009. }
  10010. if (const SCEVUnknown *RU = dyn_cast<SCEVUnknown>(RHS))
  10011. if (auto *Phi = dyn_cast<PHINode>(RU->getValue())) {
  10012. // If we detect a loop of Phi nodes being processed by this method, for
  10013. // example:
  10014. //
  10015. // %a = phi i32 [ %some1, %preheader ], [ %b, %latch ]
  10016. // %b = phi i32 [ %some2, %preheader ], [ %a, %latch ]
  10017. //
  10018. // we don't want to deal with a case that complex, so return conservative
  10019. // answer false.
  10020. if (!PendingMerges.insert(Phi).second)
  10021. return false;
  10022. RPhi = Phi;
  10023. }
  10024. // If none of LHS, RHS is a Phi, nothing to do here.
  10025. if (!LPhi && !RPhi)
  10026. return false;
  10027. // If there is a SCEVUnknown Phi we are interested in, make it left.
  10028. if (!LPhi) {
  10029. std::swap(LHS, RHS);
  10030. std::swap(FoundLHS, FoundRHS);
  10031. std::swap(LPhi, RPhi);
  10032. Pred = ICmpInst::getSwappedPredicate(Pred);
  10033. }
  10034. assert(LPhi && "LPhi should definitely be a SCEVUnknown Phi!");
  10035. const BasicBlock *LBB = LPhi->getParent();
  10036. const SCEVAddRecExpr *RAR = dyn_cast<SCEVAddRecExpr>(RHS);
  10037. auto ProvedEasily = [&](const SCEV *S1, const SCEV *S2) {
  10038. return isKnownViaNonRecursiveReasoning(Pred, S1, S2) ||
  10039. isImpliedCondOperandsViaRanges(Pred, S1, S2, FoundLHS, FoundRHS) ||
  10040. isImpliedViaOperations(Pred, S1, S2, FoundLHS, FoundRHS, Depth);
  10041. };
  10042. if (RPhi && RPhi->getParent() == LBB) {
  10043. // Case one: RHS is also a SCEVUnknown Phi from the same basic block.
  10044. // If we compare two Phis from the same block, and for each entry block
  10045. // the predicate is true for incoming values from this block, then the
  10046. // predicate is also true for the Phis.
  10047. for (const BasicBlock *IncBB : predecessors(LBB)) {
  10048. const SCEV *L = getSCEV(LPhi->getIncomingValueForBlock(IncBB));
  10049. const SCEV *R = getSCEV(RPhi->getIncomingValueForBlock(IncBB));
  10050. if (!ProvedEasily(L, R))
  10051. return false;
  10052. }
  10053. } else if (RAR && RAR->getLoop()->getHeader() == LBB) {
  10054. // Case two: RHS is also a Phi from the same basic block, and it is an
  10055. // AddRec. It means that there is a loop which has both AddRec and Unknown
  10056. // PHIs, for it we can compare incoming values of AddRec from above the loop
  10057. // and latch with their respective incoming values of LPhi.
  10058. // TODO: Generalize to handle loops with many inputs in a header.
  10059. if (LPhi->getNumIncomingValues() != 2) return false;
  10060. auto *RLoop = RAR->getLoop();
  10061. auto *Predecessor = RLoop->getLoopPredecessor();
  10062. assert(Predecessor && "Loop with AddRec with no predecessor?");
  10063. const SCEV *L1 = getSCEV(LPhi->getIncomingValueForBlock(Predecessor));
  10064. if (!ProvedEasily(L1, RAR->getStart()))
  10065. return false;
  10066. auto *Latch = RLoop->getLoopLatch();
  10067. assert(Latch && "Loop with AddRec with no latch?");
  10068. const SCEV *L2 = getSCEV(LPhi->getIncomingValueForBlock(Latch));
  10069. if (!ProvedEasily(L2, RAR->getPostIncExpr(*this)))
  10070. return false;
  10071. } else {
  10072. // In all other cases go over inputs of LHS and compare each of them to RHS,
  10073. // the predicate is true for (LHS, RHS) if it is true for all such pairs.
  10074. // At this point RHS is either a non-Phi, or it is a Phi from some block
  10075. // different from LBB.
  10076. for (const BasicBlock *IncBB : predecessors(LBB)) {
  10077. // Check that RHS is available in this block.
  10078. if (!dominates(RHS, IncBB))
  10079. return false;
  10080. const SCEV *L = getSCEV(LPhi->getIncomingValueForBlock(IncBB));
  10081. // Make sure L does not refer to a value from a potentially previous
  10082. // iteration of a loop.
  10083. if (!properlyDominates(L, IncBB))
  10084. return false;
  10085. if (!ProvedEasily(L, RHS))
  10086. return false;
  10087. }
  10088. }
  10089. return true;
  10090. }
  10091. bool ScalarEvolution::isImpliedCondOperandsViaShift(ICmpInst::Predicate Pred,
  10092. const SCEV *LHS,
  10093. const SCEV *RHS,
  10094. const SCEV *FoundLHS,
  10095. const SCEV *FoundRHS) {
  10096. // We want to imply LHS < RHS from LHS < (RHS >> shiftvalue). First, make
  10097. // sure that we are dealing with same LHS.
  10098. if (RHS == FoundRHS) {
  10099. std::swap(LHS, RHS);
  10100. std::swap(FoundLHS, FoundRHS);
  10101. Pred = ICmpInst::getSwappedPredicate(Pred);
  10102. }
  10103. if (LHS != FoundLHS)
  10104. return false;
  10105. auto *SUFoundRHS = dyn_cast<SCEVUnknown>(FoundRHS);
  10106. if (!SUFoundRHS)
  10107. return false;
  10108. Value *Shiftee, *ShiftValue;
  10109. using namespace PatternMatch;
  10110. if (match(SUFoundRHS->getValue(),
  10111. m_LShr(m_Value(Shiftee), m_Value(ShiftValue)))) {
  10112. auto *ShifteeS = getSCEV(Shiftee);
  10113. // Prove one of the following:
  10114. // LHS <u (shiftee >> shiftvalue) && shiftee <=u RHS ---> LHS <u RHS
  10115. // LHS <=u (shiftee >> shiftvalue) && shiftee <=u RHS ---> LHS <=u RHS
  10116. // LHS <s (shiftee >> shiftvalue) && shiftee <=s RHS && shiftee >=s 0
  10117. // ---> LHS <s RHS
  10118. // LHS <=s (shiftee >> shiftvalue) && shiftee <=s RHS && shiftee >=s 0
  10119. // ---> LHS <=s RHS
  10120. if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE)
  10121. return isKnownPredicate(ICmpInst::ICMP_ULE, ShifteeS, RHS);
  10122. if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE)
  10123. if (isKnownNonNegative(ShifteeS))
  10124. return isKnownPredicate(ICmpInst::ICMP_SLE, ShifteeS, RHS);
  10125. }
  10126. return false;
  10127. }
  10128. bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred,
  10129. const SCEV *LHS, const SCEV *RHS,
  10130. const SCEV *FoundLHS,
  10131. const SCEV *FoundRHS,
  10132. const Instruction *CtxI) {
  10133. if (isImpliedCondOperandsViaRanges(Pred, LHS, RHS, FoundLHS, FoundRHS))
  10134. return true;
  10135. if (isImpliedCondOperandsViaNoOverflow(Pred, LHS, RHS, FoundLHS, FoundRHS))
  10136. return true;
  10137. if (isImpliedCondOperandsViaShift(Pred, LHS, RHS, FoundLHS, FoundRHS))
  10138. return true;
  10139. if (isImpliedCondOperandsViaAddRecStart(Pred, LHS, RHS, FoundLHS, FoundRHS,
  10140. CtxI))
  10141. return true;
  10142. return isImpliedCondOperandsHelper(Pred, LHS, RHS,
  10143. FoundLHS, FoundRHS);
  10144. }
  10145. /// Is MaybeMinMaxExpr an (U|S)(Min|Max) of Candidate and some other values?
  10146. template <typename MinMaxExprType>
  10147. static bool IsMinMaxConsistingOf(const SCEV *MaybeMinMaxExpr,
  10148. const SCEV *Candidate) {
  10149. const MinMaxExprType *MinMaxExpr = dyn_cast<MinMaxExprType>(MaybeMinMaxExpr);
  10150. if (!MinMaxExpr)
  10151. return false;
  10152. return is_contained(MinMaxExpr->operands(), Candidate);
  10153. }
  10154. static bool IsKnownPredicateViaAddRecStart(ScalarEvolution &SE,
  10155. ICmpInst::Predicate Pred,
  10156. const SCEV *LHS, const SCEV *RHS) {
  10157. // If both sides are affine addrecs for the same loop, with equal
  10158. // steps, and we know the recurrences don't wrap, then we only
  10159. // need to check the predicate on the starting values.
  10160. if (!ICmpInst::isRelational(Pred))
  10161. return false;
  10162. const SCEVAddRecExpr *LAR = dyn_cast<SCEVAddRecExpr>(LHS);
  10163. if (!LAR)
  10164. return false;
  10165. const SCEVAddRecExpr *RAR = dyn_cast<SCEVAddRecExpr>(RHS);
  10166. if (!RAR)
  10167. return false;
  10168. if (LAR->getLoop() != RAR->getLoop())
  10169. return false;
  10170. if (!LAR->isAffine() || !RAR->isAffine())
  10171. return false;
  10172. if (LAR->getStepRecurrence(SE) != RAR->getStepRecurrence(SE))
  10173. return false;
  10174. SCEV::NoWrapFlags NW = ICmpInst::isSigned(Pred) ?
  10175. SCEV::FlagNSW : SCEV::FlagNUW;
  10176. if (!LAR->getNoWrapFlags(NW) || !RAR->getNoWrapFlags(NW))
  10177. return false;
  10178. return SE.isKnownPredicate(Pred, LAR->getStart(), RAR->getStart());
  10179. }
  10180. /// Is LHS `Pred` RHS true on the virtue of LHS or RHS being a Min or Max
  10181. /// expression?
  10182. static bool IsKnownPredicateViaMinOrMax(ScalarEvolution &SE,
  10183. ICmpInst::Predicate Pred,
  10184. const SCEV *LHS, const SCEV *RHS) {
  10185. switch (Pred) {
  10186. default:
  10187. return false;
  10188. case ICmpInst::ICMP_SGE:
  10189. std::swap(LHS, RHS);
  10190. LLVM_FALLTHROUGH;
  10191. case ICmpInst::ICMP_SLE:
  10192. return
  10193. // min(A, ...) <= A
  10194. IsMinMaxConsistingOf<SCEVSMinExpr>(LHS, RHS) ||
  10195. // A <= max(A, ...)
  10196. IsMinMaxConsistingOf<SCEVSMaxExpr>(RHS, LHS);
  10197. case ICmpInst::ICMP_UGE:
  10198. std::swap(LHS, RHS);
  10199. LLVM_FALLTHROUGH;
  10200. case ICmpInst::ICMP_ULE:
  10201. return
  10202. // min(A, ...) <= A
  10203. // FIXME: what about umin_seq?
  10204. IsMinMaxConsistingOf<SCEVUMinExpr>(LHS, RHS) ||
  10205. // A <= max(A, ...)
  10206. IsMinMaxConsistingOf<SCEVUMaxExpr>(RHS, LHS);
  10207. }
  10208. llvm_unreachable("covered switch fell through?!");
  10209. }
  10210. bool ScalarEvolution::isImpliedViaOperations(ICmpInst::Predicate Pred,
  10211. const SCEV *LHS, const SCEV *RHS,
  10212. const SCEV *FoundLHS,
  10213. const SCEV *FoundRHS,
  10214. unsigned Depth) {
  10215. assert(getTypeSizeInBits(LHS->getType()) ==
  10216. getTypeSizeInBits(RHS->getType()) &&
  10217. "LHS and RHS have different sizes?");
  10218. assert(getTypeSizeInBits(FoundLHS->getType()) ==
  10219. getTypeSizeInBits(FoundRHS->getType()) &&
  10220. "FoundLHS and FoundRHS have different sizes?");
  10221. // We want to avoid hurting the compile time with analysis of too big trees.
  10222. if (Depth > MaxSCEVOperationsImplicationDepth)
  10223. return false;
  10224. // We only want to work with GT comparison so far.
  10225. if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_SLT) {
  10226. Pred = CmpInst::getSwappedPredicate(Pred);
  10227. std::swap(LHS, RHS);
  10228. std::swap(FoundLHS, FoundRHS);
  10229. }
  10230. // For unsigned, try to reduce it to corresponding signed comparison.
  10231. if (Pred == ICmpInst::ICMP_UGT)
  10232. // We can replace unsigned predicate with its signed counterpart if all
  10233. // involved values are non-negative.
  10234. // TODO: We could have better support for unsigned.
  10235. if (isKnownNonNegative(FoundLHS) && isKnownNonNegative(FoundRHS)) {
  10236. // Knowing that both FoundLHS and FoundRHS are non-negative, and knowing
  10237. // FoundLHS >u FoundRHS, we also know that FoundLHS >s FoundRHS. Let us
  10238. // use this fact to prove that LHS and RHS are non-negative.
  10239. const SCEV *MinusOne = getMinusOne(LHS->getType());
  10240. if (isImpliedCondOperands(ICmpInst::ICMP_SGT, LHS, MinusOne, FoundLHS,
  10241. FoundRHS) &&
  10242. isImpliedCondOperands(ICmpInst::ICMP_SGT, RHS, MinusOne, FoundLHS,
  10243. FoundRHS))
  10244. Pred = ICmpInst::ICMP_SGT;
  10245. }
  10246. if (Pred != ICmpInst::ICMP_SGT)
  10247. return false;
  10248. auto GetOpFromSExt = [&](const SCEV *S) {
  10249. if (auto *Ext = dyn_cast<SCEVSignExtendExpr>(S))
  10250. return Ext->getOperand();
  10251. // TODO: If S is a SCEVConstant then you can cheaply "strip" the sext off
  10252. // the constant in some cases.
  10253. return S;
  10254. };
  10255. // Acquire values from extensions.
  10256. auto *OrigLHS = LHS;
  10257. auto *OrigFoundLHS = FoundLHS;
  10258. LHS = GetOpFromSExt(LHS);
  10259. FoundLHS = GetOpFromSExt(FoundLHS);
  10260. // Is the SGT predicate can be proved trivially or using the found context.
  10261. auto IsSGTViaContext = [&](const SCEV *S1, const SCEV *S2) {
  10262. return isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_SGT, S1, S2) ||
  10263. isImpliedViaOperations(ICmpInst::ICMP_SGT, S1, S2, OrigFoundLHS,
  10264. FoundRHS, Depth + 1);
  10265. };
  10266. if (auto *LHSAddExpr = dyn_cast<SCEVAddExpr>(LHS)) {
  10267. // We want to avoid creation of any new non-constant SCEV. Since we are
  10268. // going to compare the operands to RHS, we should be certain that we don't
  10269. // need any size extensions for this. So let's decline all cases when the
  10270. // sizes of types of LHS and RHS do not match.
  10271. // TODO: Maybe try to get RHS from sext to catch more cases?
  10272. if (getTypeSizeInBits(LHS->getType()) != getTypeSizeInBits(RHS->getType()))
  10273. return false;
  10274. // Should not overflow.
  10275. if (!LHSAddExpr->hasNoSignedWrap())
  10276. return false;
  10277. auto *LL = LHSAddExpr->getOperand(0);
  10278. auto *LR = LHSAddExpr->getOperand(1);
  10279. auto *MinusOne = getMinusOne(RHS->getType());
  10280. // Checks that S1 >= 0 && S2 > RHS, trivially or using the found context.
  10281. auto IsSumGreaterThanRHS = [&](const SCEV *S1, const SCEV *S2) {
  10282. return IsSGTViaContext(S1, MinusOne) && IsSGTViaContext(S2, RHS);
  10283. };
  10284. // Try to prove the following rule:
  10285. // (LHS = LL + LR) && (LL >= 0) && (LR > RHS) => (LHS > RHS).
  10286. // (LHS = LL + LR) && (LR >= 0) && (LL > RHS) => (LHS > RHS).
  10287. if (IsSumGreaterThanRHS(LL, LR) || IsSumGreaterThanRHS(LR, LL))
  10288. return true;
  10289. } else if (auto *LHSUnknownExpr = dyn_cast<SCEVUnknown>(LHS)) {
  10290. Value *LL, *LR;
  10291. // FIXME: Once we have SDiv implemented, we can get rid of this matching.
  10292. using namespace llvm::PatternMatch;
  10293. if (match(LHSUnknownExpr->getValue(), m_SDiv(m_Value(LL), m_Value(LR)))) {
  10294. // Rules for division.
  10295. // We are going to perform some comparisons with Denominator and its
  10296. // derivative expressions. In general case, creating a SCEV for it may
  10297. // lead to a complex analysis of the entire graph, and in particular it
  10298. // can request trip count recalculation for the same loop. This would
  10299. // cache as SCEVCouldNotCompute to avoid the infinite recursion. To avoid
  10300. // this, we only want to create SCEVs that are constants in this section.
  10301. // So we bail if Denominator is not a constant.
  10302. if (!isa<ConstantInt>(LR))
  10303. return false;
  10304. auto *Denominator = cast<SCEVConstant>(getSCEV(LR));
  10305. // We want to make sure that LHS = FoundLHS / Denominator. If it is so,
  10306. // then a SCEV for the numerator already exists and matches with FoundLHS.
  10307. auto *Numerator = getExistingSCEV(LL);
  10308. if (!Numerator || Numerator->getType() != FoundLHS->getType())
  10309. return false;
  10310. // Make sure that the numerator matches with FoundLHS and the denominator
  10311. // is positive.
  10312. if (!HasSameValue(Numerator, FoundLHS) || !isKnownPositive(Denominator))
  10313. return false;
  10314. auto *DTy = Denominator->getType();
  10315. auto *FRHSTy = FoundRHS->getType();
  10316. if (DTy->isPointerTy() != FRHSTy->isPointerTy())
  10317. // One of types is a pointer and another one is not. We cannot extend
  10318. // them properly to a wider type, so let us just reject this case.
  10319. // TODO: Usage of getEffectiveSCEVType for DTy, FRHSTy etc should help
  10320. // to avoid this check.
  10321. return false;
  10322. // Given that:
  10323. // FoundLHS > FoundRHS, LHS = FoundLHS / Denominator, Denominator > 0.
  10324. auto *WTy = getWiderType(DTy, FRHSTy);
  10325. auto *DenominatorExt = getNoopOrSignExtend(Denominator, WTy);
  10326. auto *FoundRHSExt = getNoopOrSignExtend(FoundRHS, WTy);
  10327. // Try to prove the following rule:
  10328. // (FoundRHS > Denominator - 2) && (RHS <= 0) => (LHS > RHS).
  10329. // For example, given that FoundLHS > 2. It means that FoundLHS is at
  10330. // least 3. If we divide it by Denominator < 4, we will have at least 1.
  10331. auto *DenomMinusTwo = getMinusSCEV(DenominatorExt, getConstant(WTy, 2));
  10332. if (isKnownNonPositive(RHS) &&
  10333. IsSGTViaContext(FoundRHSExt, DenomMinusTwo))
  10334. return true;
  10335. // Try to prove the following rule:
  10336. // (FoundRHS > -1 - Denominator) && (RHS < 0) => (LHS > RHS).
  10337. // For example, given that FoundLHS > -3. Then FoundLHS is at least -2.
  10338. // If we divide it by Denominator > 2, then:
  10339. // 1. If FoundLHS is negative, then the result is 0.
  10340. // 2. If FoundLHS is non-negative, then the result is non-negative.
  10341. // Anyways, the result is non-negative.
  10342. auto *MinusOne = getMinusOne(WTy);
  10343. auto *NegDenomMinusOne = getMinusSCEV(MinusOne, DenominatorExt);
  10344. if (isKnownNegative(RHS) &&
  10345. IsSGTViaContext(FoundRHSExt, NegDenomMinusOne))
  10346. return true;
  10347. }
  10348. }
  10349. // If our expression contained SCEVUnknown Phis, and we split it down and now
  10350. // need to prove something for them, try to prove the predicate for every
  10351. // possible incoming values of those Phis.
  10352. if (isImpliedViaMerge(Pred, OrigLHS, RHS, OrigFoundLHS, FoundRHS, Depth + 1))
  10353. return true;
  10354. return false;
  10355. }
  10356. static bool isKnownPredicateExtendIdiom(ICmpInst::Predicate Pred,
  10357. const SCEV *LHS, const SCEV *RHS) {
  10358. // zext x u<= sext x, sext x s<= zext x
  10359. switch (Pred) {
  10360. case ICmpInst::ICMP_SGE:
  10361. std::swap(LHS, RHS);
  10362. LLVM_FALLTHROUGH;
  10363. case ICmpInst::ICMP_SLE: {
  10364. // If operand >=s 0 then ZExt == SExt. If operand <s 0 then SExt <s ZExt.
  10365. const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(LHS);
  10366. const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(RHS);
  10367. if (SExt && ZExt && SExt->getOperand() == ZExt->getOperand())
  10368. return true;
  10369. break;
  10370. }
  10371. case ICmpInst::ICMP_UGE:
  10372. std::swap(LHS, RHS);
  10373. LLVM_FALLTHROUGH;
  10374. case ICmpInst::ICMP_ULE: {
  10375. // If operand >=s 0 then ZExt == SExt. If operand <s 0 then ZExt <u SExt.
  10376. const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(LHS);
  10377. const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(RHS);
  10378. if (SExt && ZExt && SExt->getOperand() == ZExt->getOperand())
  10379. return true;
  10380. break;
  10381. }
  10382. default:
  10383. break;
  10384. };
  10385. return false;
  10386. }
  10387. bool
  10388. ScalarEvolution::isKnownViaNonRecursiveReasoning(ICmpInst::Predicate Pred,
  10389. const SCEV *LHS, const SCEV *RHS) {
  10390. return isKnownPredicateExtendIdiom(Pred, LHS, RHS) ||
  10391. isKnownPredicateViaConstantRanges(Pred, LHS, RHS) ||
  10392. IsKnownPredicateViaMinOrMax(*this, Pred, LHS, RHS) ||
  10393. IsKnownPredicateViaAddRecStart(*this, Pred, LHS, RHS) ||
  10394. isKnownPredicateViaNoOverflow(Pred, LHS, RHS);
  10395. }
  10396. bool
  10397. ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,
  10398. const SCEV *LHS, const SCEV *RHS,
  10399. const SCEV *FoundLHS,
  10400. const SCEV *FoundRHS) {
  10401. switch (Pred) {
  10402. default: llvm_unreachable("Unexpected ICmpInst::Predicate value!");
  10403. case ICmpInst::ICMP_EQ:
  10404. case ICmpInst::ICMP_NE:
  10405. if (HasSameValue(LHS, FoundLHS) && HasSameValue(RHS, FoundRHS))
  10406. return true;
  10407. break;
  10408. case ICmpInst::ICMP_SLT:
  10409. case ICmpInst::ICMP_SLE:
  10410. if (isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_SLE, LHS, FoundLHS) &&
  10411. isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_SGE, RHS, FoundRHS))
  10412. return true;
  10413. break;
  10414. case ICmpInst::ICMP_SGT:
  10415. case ICmpInst::ICMP_SGE:
  10416. if (isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_SGE, LHS, FoundLHS) &&
  10417. isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_SLE, RHS, FoundRHS))
  10418. return true;
  10419. break;
  10420. case ICmpInst::ICMP_ULT:
  10421. case ICmpInst::ICMP_ULE:
  10422. if (isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_ULE, LHS, FoundLHS) &&
  10423. isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_UGE, RHS, FoundRHS))
  10424. return true;
  10425. break;
  10426. case ICmpInst::ICMP_UGT:
  10427. case ICmpInst::ICMP_UGE:
  10428. if (isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_UGE, LHS, FoundLHS) &&
  10429. isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_ULE, RHS, FoundRHS))
  10430. return true;
  10431. break;
  10432. }
  10433. // Maybe it can be proved via operations?
  10434. if (isImpliedViaOperations(Pred, LHS, RHS, FoundLHS, FoundRHS))
  10435. return true;
  10436. return false;
  10437. }
  10438. bool ScalarEvolution::isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred,
  10439. const SCEV *LHS,
  10440. const SCEV *RHS,
  10441. const SCEV *FoundLHS,
  10442. const SCEV *FoundRHS) {
  10443. if (!isa<SCEVConstant>(RHS) || !isa<SCEVConstant>(FoundRHS))
  10444. // The restriction on `FoundRHS` be lifted easily -- it exists only to
  10445. // reduce the compile time impact of this optimization.
  10446. return false;
  10447. Optional<APInt> Addend = computeConstantDifference(LHS, FoundLHS);
  10448. if (!Addend)
  10449. return false;
  10450. const APInt &ConstFoundRHS = cast<SCEVConstant>(FoundRHS)->getAPInt();
  10451. // `FoundLHSRange` is the range we know `FoundLHS` to be in by virtue of the
  10452. // antecedent "`FoundLHS` `Pred` `FoundRHS`".
  10453. ConstantRange FoundLHSRange =
  10454. ConstantRange::makeExactICmpRegion(Pred, ConstFoundRHS);
  10455. // Since `LHS` is `FoundLHS` + `Addend`, we can compute a range for `LHS`:
  10456. ConstantRange LHSRange = FoundLHSRange.add(ConstantRange(*Addend));
  10457. // We can also compute the range of values for `LHS` that satisfy the
  10458. // consequent, "`LHS` `Pred` `RHS`":
  10459. const APInt &ConstRHS = cast<SCEVConstant>(RHS)->getAPInt();
  10460. // The antecedent implies the consequent if every value of `LHS` that
  10461. // satisfies the antecedent also satisfies the consequent.
  10462. return LHSRange.icmp(Pred, ConstRHS);
  10463. }
  10464. bool ScalarEvolution::canIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride,
  10465. bool IsSigned) {
  10466. assert(isKnownPositive(Stride) && "Positive stride expected!");
  10467. unsigned BitWidth = getTypeSizeInBits(RHS->getType());
  10468. const SCEV *One = getOne(Stride->getType());
  10469. if (IsSigned) {
  10470. APInt MaxRHS = getSignedRangeMax(RHS);
  10471. APInt MaxValue = APInt::getSignedMaxValue(BitWidth);
  10472. APInt MaxStrideMinusOne = getSignedRangeMax(getMinusSCEV(Stride, One));
  10473. // SMaxRHS + SMaxStrideMinusOne > SMaxValue => overflow!
  10474. return (std::move(MaxValue) - MaxStrideMinusOne).slt(MaxRHS);
  10475. }
  10476. APInt MaxRHS = getUnsignedRangeMax(RHS);
  10477. APInt MaxValue = APInt::getMaxValue(BitWidth);
  10478. APInt MaxStrideMinusOne = getUnsignedRangeMax(getMinusSCEV(Stride, One));
  10479. // UMaxRHS + UMaxStrideMinusOne > UMaxValue => overflow!
  10480. return (std::move(MaxValue) - MaxStrideMinusOne).ult(MaxRHS);
  10481. }
  10482. bool ScalarEvolution::canIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride,
  10483. bool IsSigned) {
  10484. unsigned BitWidth = getTypeSizeInBits(RHS->getType());
  10485. const SCEV *One = getOne(Stride->getType());
  10486. if (IsSigned) {
  10487. APInt MinRHS = getSignedRangeMin(RHS);
  10488. APInt MinValue = APInt::getSignedMinValue(BitWidth);
  10489. APInt MaxStrideMinusOne = getSignedRangeMax(getMinusSCEV(Stride, One));
  10490. // SMinRHS - SMaxStrideMinusOne < SMinValue => overflow!
  10491. return (std::move(MinValue) + MaxStrideMinusOne).sgt(MinRHS);
  10492. }
  10493. APInt MinRHS = getUnsignedRangeMin(RHS);
  10494. APInt MinValue = APInt::getMinValue(BitWidth);
  10495. APInt MaxStrideMinusOne = getUnsignedRangeMax(getMinusSCEV(Stride, One));
  10496. // UMinRHS - UMaxStrideMinusOne < UMinValue => overflow!
  10497. return (std::move(MinValue) + MaxStrideMinusOne).ugt(MinRHS);
  10498. }
  10499. const SCEV *ScalarEvolution::getUDivCeilSCEV(const SCEV *N, const SCEV *D) {
  10500. // umin(N, 1) + floor((N - umin(N, 1)) / D)
  10501. // This is equivalent to "1 + floor((N - 1) / D)" for N != 0. The umin
  10502. // expression fixes the case of N=0.
  10503. const SCEV *MinNOne = getUMinExpr(N, getOne(N->getType()));
  10504. const SCEV *NMinusOne = getMinusSCEV(N, MinNOne);
  10505. return getAddExpr(MinNOne, getUDivExpr(NMinusOne, D));
  10506. }
  10507. const SCEV *ScalarEvolution::computeMaxBECountForLT(const SCEV *Start,
  10508. const SCEV *Stride,
  10509. const SCEV *End,
  10510. unsigned BitWidth,
  10511. bool IsSigned) {
  10512. // The logic in this function assumes we can represent a positive stride.
  10513. // If we can't, the backedge-taken count must be zero.
  10514. if (IsSigned && BitWidth == 1)
  10515. return getZero(Stride->getType());
  10516. // This code has only been closely audited for negative strides in the
  10517. // unsigned comparison case, it may be correct for signed comparison, but
  10518. // that needs to be established.
  10519. assert((!IsSigned || !isKnownNonPositive(Stride)) &&
  10520. "Stride is expected strictly positive for signed case!");
  10521. // Calculate the maximum backedge count based on the range of values
  10522. // permitted by Start, End, and Stride.
  10523. APInt MinStart =
  10524. IsSigned ? getSignedRangeMin(Start) : getUnsignedRangeMin(Start);
  10525. APInt MinStride =
  10526. IsSigned ? getSignedRangeMin(Stride) : getUnsignedRangeMin(Stride);
  10527. // We assume either the stride is positive, or the backedge-taken count
  10528. // is zero. So force StrideForMaxBECount to be at least one.
  10529. APInt One(BitWidth, 1);
  10530. APInt StrideForMaxBECount = IsSigned ? APIntOps::smax(One, MinStride)
  10531. : APIntOps::umax(One, MinStride);
  10532. APInt MaxValue = IsSigned ? APInt::getSignedMaxValue(BitWidth)
  10533. : APInt::getMaxValue(BitWidth);
  10534. APInt Limit = MaxValue - (StrideForMaxBECount - 1);
  10535. // Although End can be a MAX expression we estimate MaxEnd considering only
  10536. // the case End = RHS of the loop termination condition. This is safe because
  10537. // in the other case (End - Start) is zero, leading to a zero maximum backedge
  10538. // taken count.
  10539. APInt MaxEnd = IsSigned ? APIntOps::smin(getSignedRangeMax(End), Limit)
  10540. : APIntOps::umin(getUnsignedRangeMax(End), Limit);
  10541. // MaxBECount = ceil((max(MaxEnd, MinStart) - MinStart) / Stride)
  10542. MaxEnd = IsSigned ? APIntOps::smax(MaxEnd, MinStart)
  10543. : APIntOps::umax(MaxEnd, MinStart);
  10544. return getUDivCeilSCEV(getConstant(MaxEnd - MinStart) /* Delta */,
  10545. getConstant(StrideForMaxBECount) /* Step */);
  10546. }
  10547. ScalarEvolution::ExitLimit
  10548. ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
  10549. const Loop *L, bool IsSigned,
  10550. bool ControlsExit, bool AllowPredicates) {
  10551. SmallPtrSet<const SCEVPredicate *, 4> Predicates;
  10552. const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS);
  10553. bool PredicatedIV = false;
  10554. auto canAssumeNoSelfWrap = [&](const SCEVAddRecExpr *AR) {
  10555. // Can we prove this loop *must* be UB if overflow of IV occurs?
  10556. // Reasoning goes as follows:
  10557. // * Suppose the IV did self wrap.
  10558. // * If Stride evenly divides the iteration space, then once wrap
  10559. // occurs, the loop must revisit the same values.
  10560. // * We know that RHS is invariant, and that none of those values
  10561. // caused this exit to be taken previously. Thus, this exit is
  10562. // dynamically dead.
  10563. // * If this is the sole exit, then a dead exit implies the loop
  10564. // must be infinite if there are no abnormal exits.
  10565. // * If the loop were infinite, then it must either not be mustprogress
  10566. // or have side effects. Otherwise, it must be UB.
  10567. // * It can't (by assumption), be UB so we have contradicted our
  10568. // premise and can conclude the IV did not in fact self-wrap.
  10569. if (!isLoopInvariant(RHS, L))
  10570. return false;
  10571. auto *StrideC = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this));
  10572. if (!StrideC || !StrideC->getAPInt().isPowerOf2())
  10573. return false;
  10574. if (!ControlsExit || !loopHasNoAbnormalExits(L))
  10575. return false;
  10576. return loopIsFiniteByAssumption(L);
  10577. };
  10578. if (!IV) {
  10579. if (auto *ZExt = dyn_cast<SCEVZeroExtendExpr>(LHS)) {
  10580. const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ZExt->getOperand());
  10581. if (AR && AR->getLoop() == L && AR->isAffine()) {
  10582. auto canProveNUW = [&]() {
  10583. if (!isLoopInvariant(RHS, L))
  10584. return false;
  10585. if (!isKnownNonZero(AR->getStepRecurrence(*this)))
  10586. // We need the sequence defined by AR to strictly increase in the
  10587. // unsigned integer domain for the logic below to hold.
  10588. return false;
  10589. const unsigned InnerBitWidth = getTypeSizeInBits(AR->getType());
  10590. const unsigned OuterBitWidth = getTypeSizeInBits(RHS->getType());
  10591. // If RHS <=u Limit, then there must exist a value V in the sequence
  10592. // defined by AR (e.g. {Start,+,Step}) such that V >u RHS, and
  10593. // V <=u UINT_MAX. Thus, we must exit the loop before unsigned
  10594. // overflow occurs. This limit also implies that a signed comparison
  10595. // (in the wide bitwidth) is equivalent to an unsigned comparison as
  10596. // the high bits on both sides must be zero.
  10597. APInt StrideMax = getUnsignedRangeMax(AR->getStepRecurrence(*this));
  10598. APInt Limit = APInt::getMaxValue(InnerBitWidth) - (StrideMax - 1);
  10599. Limit = Limit.zext(OuterBitWidth);
  10600. return getUnsignedRangeMax(applyLoopGuards(RHS, L)).ule(Limit);
  10601. };
  10602. auto Flags = AR->getNoWrapFlags();
  10603. if (!hasFlags(Flags, SCEV::FlagNUW) && canProveNUW())
  10604. Flags = setFlags(Flags, SCEV::FlagNUW);
  10605. setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), Flags);
  10606. if (AR->hasNoUnsignedWrap()) {
  10607. // Emulate what getZeroExtendExpr would have done during construction
  10608. // if we'd been able to infer the fact just above at that time.
  10609. const SCEV *Step = AR->getStepRecurrence(*this);
  10610. Type *Ty = ZExt->getType();
  10611. auto *S = getAddRecExpr(
  10612. getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, 0),
  10613. getZeroExtendExpr(Step, Ty, 0), L, AR->getNoWrapFlags());
  10614. IV = dyn_cast<SCEVAddRecExpr>(S);
  10615. }
  10616. }
  10617. }
  10618. }
  10619. if (!IV && AllowPredicates) {
  10620. // Try to make this an AddRec using runtime tests, in the first X
  10621. // iterations of this loop, where X is the SCEV expression found by the
  10622. // algorithm below.
  10623. IV = convertSCEVToAddRecWithPredicates(LHS, L, Predicates);
  10624. PredicatedIV = true;
  10625. }
  10626. // Avoid weird loops
  10627. if (!IV || IV->getLoop() != L || !IV->isAffine())
  10628. return getCouldNotCompute();
  10629. // A precondition of this method is that the condition being analyzed
  10630. // reaches an exiting branch which dominates the latch. Given that, we can
  10631. // assume that an increment which violates the nowrap specification and
  10632. // produces poison must cause undefined behavior when the resulting poison
  10633. // value is branched upon and thus we can conclude that the backedge is
  10634. // taken no more often than would be required to produce that poison value.
  10635. // Note that a well defined loop can exit on the iteration which violates
  10636. // the nowrap specification if there is another exit (either explicit or
  10637. // implicit/exceptional) which causes the loop to execute before the
  10638. // exiting instruction we're analyzing would trigger UB.
  10639. auto WrapType = IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW;
  10640. bool NoWrap = ControlsExit && IV->getNoWrapFlags(WrapType);
  10641. ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
  10642. const SCEV *Stride = IV->getStepRecurrence(*this);
  10643. bool PositiveStride = isKnownPositive(Stride);
  10644. // Avoid negative or zero stride values.
  10645. if (!PositiveStride) {
  10646. // We can compute the correct backedge taken count for loops with unknown
  10647. // strides if we can prove that the loop is not an infinite loop with side
  10648. // effects. Here's the loop structure we are trying to handle -
  10649. //
  10650. // i = start
  10651. // do {
  10652. // A[i] = i;
  10653. // i += s;
  10654. // } while (i < end);
  10655. //
  10656. // The backedge taken count for such loops is evaluated as -
  10657. // (max(end, start + stride) - start - 1) /u stride
  10658. //
  10659. // The additional preconditions that we need to check to prove correctness
  10660. // of the above formula is as follows -
  10661. //
  10662. // a) IV is either nuw or nsw depending upon signedness (indicated by the
  10663. // NoWrap flag).
  10664. // b) the loop is guaranteed to be finite (e.g. is mustprogress and has
  10665. // no side effects within the loop)
  10666. // c) loop has a single static exit (with no abnormal exits)
  10667. //
  10668. // Precondition a) implies that if the stride is negative, this is a single
  10669. // trip loop. The backedge taken count formula reduces to zero in this case.
  10670. //
  10671. // Precondition b) and c) combine to imply that if rhs is invariant in L,
  10672. // then a zero stride means the backedge can't be taken without executing
  10673. // undefined behavior.
  10674. //
  10675. // The positive stride case is the same as isKnownPositive(Stride) returning
  10676. // true (original behavior of the function).
  10677. //
  10678. if (PredicatedIV || !NoWrap || !loopIsFiniteByAssumption(L) ||
  10679. !loopHasNoAbnormalExits(L))
  10680. return getCouldNotCompute();
  10681. // This bailout is protecting the logic in computeMaxBECountForLT which
  10682. // has not yet been sufficiently auditted or tested with negative strides.
  10683. // We used to filter out all known-non-positive cases here, we're in the
  10684. // process of being less restrictive bit by bit.
  10685. if (IsSigned && isKnownNonPositive(Stride))
  10686. return getCouldNotCompute();
  10687. if (!isKnownNonZero(Stride)) {
  10688. // If we have a step of zero, and RHS isn't invariant in L, we don't know
  10689. // if it might eventually be greater than start and if so, on which
  10690. // iteration. We can't even produce a useful upper bound.
  10691. if (!isLoopInvariant(RHS, L))
  10692. return getCouldNotCompute();
  10693. // We allow a potentially zero stride, but we need to divide by stride
  10694. // below. Since the loop can't be infinite and this check must control
  10695. // the sole exit, we can infer the exit must be taken on the first
  10696. // iteration (e.g. backedge count = 0) if the stride is zero. Given that,
  10697. // we know the numerator in the divides below must be zero, so we can
  10698. // pick an arbitrary non-zero value for the denominator (e.g. stride)
  10699. // and produce the right result.
  10700. // FIXME: Handle the case where Stride is poison?
  10701. auto wouldZeroStrideBeUB = [&]() {
  10702. // Proof by contradiction. Suppose the stride were zero. If we can
  10703. // prove that the backedge *is* taken on the first iteration, then since
  10704. // we know this condition controls the sole exit, we must have an
  10705. // infinite loop. We can't have a (well defined) infinite loop per
  10706. // check just above.
  10707. // Note: The (Start - Stride) term is used to get the start' term from
  10708. // (start' + stride,+,stride). Remember that we only care about the
  10709. // result of this expression when stride == 0 at runtime.
  10710. auto *StartIfZero = getMinusSCEV(IV->getStart(), Stride);
  10711. return isLoopEntryGuardedByCond(L, Cond, StartIfZero, RHS);
  10712. };
  10713. if (!wouldZeroStrideBeUB()) {
  10714. Stride = getUMaxExpr(Stride, getOne(Stride->getType()));
  10715. }
  10716. }
  10717. } else if (!Stride->isOne() && !NoWrap) {
  10718. auto isUBOnWrap = [&]() {
  10719. // From no-self-wrap, we need to then prove no-(un)signed-wrap. This
  10720. // follows trivially from the fact that every (un)signed-wrapped, but
  10721. // not self-wrapped value must be LT than the last value before
  10722. // (un)signed wrap. Since we know that last value didn't exit, nor
  10723. // will any smaller one.
  10724. return canAssumeNoSelfWrap(IV);
  10725. };
  10726. // Avoid proven overflow cases: this will ensure that the backedge taken
  10727. // count will not generate any unsigned overflow. Relaxed no-overflow
  10728. // conditions exploit NoWrapFlags, allowing to optimize in presence of
  10729. // undefined behaviors like the case of C language.
  10730. if (canIVOverflowOnLT(RHS, Stride, IsSigned) && !isUBOnWrap())
  10731. return getCouldNotCompute();
  10732. }
  10733. // On all paths just preceeding, we established the following invariant:
  10734. // IV can be assumed not to overflow up to and including the exiting
  10735. // iteration. We proved this in one of two ways:
  10736. // 1) We can show overflow doesn't occur before the exiting iteration
  10737. // 1a) canIVOverflowOnLT, and b) step of one
  10738. // 2) We can show that if overflow occurs, the loop must execute UB
  10739. // before any possible exit.
  10740. // Note that we have not yet proved RHS invariant (in general).
  10741. const SCEV *Start = IV->getStart();
  10742. // Preserve pointer-typed Start/RHS to pass to isLoopEntryGuardedByCond.
  10743. // If we convert to integers, isLoopEntryGuardedByCond will miss some cases.
  10744. // Use integer-typed versions for actual computation; we can't subtract
  10745. // pointers in general.
  10746. const SCEV *OrigStart = Start;
  10747. const SCEV *OrigRHS = RHS;
  10748. if (Start->getType()->isPointerTy()) {
  10749. Start = getLosslessPtrToIntExpr(Start);
  10750. if (isa<SCEVCouldNotCompute>(Start))
  10751. return Start;
  10752. }
  10753. if (RHS->getType()->isPointerTy()) {
  10754. RHS = getLosslessPtrToIntExpr(RHS);
  10755. if (isa<SCEVCouldNotCompute>(RHS))
  10756. return RHS;
  10757. }
  10758. // When the RHS is not invariant, we do not know the end bound of the loop and
  10759. // cannot calculate the ExactBECount needed by ExitLimit. However, we can
  10760. // calculate the MaxBECount, given the start, stride and max value for the end
  10761. // bound of the loop (RHS), and the fact that IV does not overflow (which is
  10762. // checked above).
  10763. if (!isLoopInvariant(RHS, L)) {
  10764. const SCEV *MaxBECount = computeMaxBECountForLT(
  10765. Start, Stride, RHS, getTypeSizeInBits(LHS->getType()), IsSigned);
  10766. return ExitLimit(getCouldNotCompute() /* ExactNotTaken */, MaxBECount,
  10767. false /*MaxOrZero*/, Predicates);
  10768. }
  10769. // We use the expression (max(End,Start)-Start)/Stride to describe the
  10770. // backedge count, as if the backedge is taken at least once max(End,Start)
  10771. // is End and so the result is as above, and if not max(End,Start) is Start
  10772. // so we get a backedge count of zero.
  10773. const SCEV *BECount = nullptr;
  10774. auto *OrigStartMinusStride = getMinusSCEV(OrigStart, Stride);
  10775. assert(isAvailableAtLoopEntry(OrigStartMinusStride, L) && "Must be!");
  10776. assert(isAvailableAtLoopEntry(OrigStart, L) && "Must be!");
  10777. assert(isAvailableAtLoopEntry(OrigRHS, L) && "Must be!");
  10778. // Can we prove (max(RHS,Start) > Start - Stride?
  10779. if (isLoopEntryGuardedByCond(L, Cond, OrigStartMinusStride, OrigStart) &&
  10780. isLoopEntryGuardedByCond(L, Cond, OrigStartMinusStride, OrigRHS)) {
  10781. // In this case, we can use a refined formula for computing backedge taken
  10782. // count. The general formula remains:
  10783. // "End-Start /uceiling Stride" where "End = max(RHS,Start)"
  10784. // We want to use the alternate formula:
  10785. // "((End - 1) - (Start - Stride)) /u Stride"
  10786. // Let's do a quick case analysis to show these are equivalent under
  10787. // our precondition that max(RHS,Start) > Start - Stride.
  10788. // * For RHS <= Start, the backedge-taken count must be zero.
  10789. // "((End - 1) - (Start - Stride)) /u Stride" reduces to
  10790. // "((Start - 1) - (Start - Stride)) /u Stride" which simplies to
  10791. // "Stride - 1 /u Stride" which is indeed zero for all non-zero values
  10792. // of Stride. For 0 stride, we've use umin(1,Stride) above, reducing
  10793. // this to the stride of 1 case.
  10794. // * For RHS >= Start, the backedge count must be "RHS-Start /uceil Stride".
  10795. // "((End - 1) - (Start - Stride)) /u Stride" reduces to
  10796. // "((RHS - 1) - (Start - Stride)) /u Stride" reassociates to
  10797. // "((RHS - (Start - Stride) - 1) /u Stride".
  10798. // Our preconditions trivially imply no overflow in that form.
  10799. const SCEV *MinusOne = getMinusOne(Stride->getType());
  10800. const SCEV *Numerator =
  10801. getMinusSCEV(getAddExpr(RHS, MinusOne), getMinusSCEV(Start, Stride));
  10802. BECount = getUDivExpr(Numerator, Stride);
  10803. }
  10804. const SCEV *BECountIfBackedgeTaken = nullptr;
  10805. if (!BECount) {
  10806. auto canProveRHSGreaterThanEqualStart = [&]() {
  10807. auto CondGE = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
  10808. if (isLoopEntryGuardedByCond(L, CondGE, OrigRHS, OrigStart))
  10809. return true;
  10810. // (RHS > Start - 1) implies RHS >= Start.
  10811. // * "RHS >= Start" is trivially equivalent to "RHS > Start - 1" if
  10812. // "Start - 1" doesn't overflow.
  10813. // * For signed comparison, if Start - 1 does overflow, it's equal
  10814. // to INT_MAX, and "RHS >s INT_MAX" is trivially false.
  10815. // * For unsigned comparison, if Start - 1 does overflow, it's equal
  10816. // to UINT_MAX, and "RHS >u UINT_MAX" is trivially false.
  10817. //
  10818. // FIXME: Should isLoopEntryGuardedByCond do this for us?
  10819. auto CondGT = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
  10820. auto *StartMinusOne = getAddExpr(OrigStart,
  10821. getMinusOne(OrigStart->getType()));
  10822. return isLoopEntryGuardedByCond(L, CondGT, OrigRHS, StartMinusOne);
  10823. };
  10824. // If we know that RHS >= Start in the context of loop, then we know that
  10825. // max(RHS, Start) = RHS at this point.
  10826. const SCEV *End;
  10827. if (canProveRHSGreaterThanEqualStart()) {
  10828. End = RHS;
  10829. } else {
  10830. // If RHS < Start, the backedge will be taken zero times. So in
  10831. // general, we can write the backedge-taken count as:
  10832. //
  10833. // RHS >= Start ? ceil(RHS - Start) / Stride : 0
  10834. //
  10835. // We convert it to the following to make it more convenient for SCEV:
  10836. //
  10837. // ceil(max(RHS, Start) - Start) / Stride
  10838. End = IsSigned ? getSMaxExpr(RHS, Start) : getUMaxExpr(RHS, Start);
  10839. // See what would happen if we assume the backedge is taken. This is
  10840. // used to compute MaxBECount.
  10841. BECountIfBackedgeTaken = getUDivCeilSCEV(getMinusSCEV(RHS, Start), Stride);
  10842. }
  10843. // At this point, we know:
  10844. //
  10845. // 1. If IsSigned, Start <=s End; otherwise, Start <=u End
  10846. // 2. The index variable doesn't overflow.
  10847. //
  10848. // Therefore, we know N exists such that
  10849. // (Start + Stride * N) >= End, and computing "(Start + Stride * N)"
  10850. // doesn't overflow.
  10851. //
  10852. // Using this information, try to prove whether the addition in
  10853. // "(Start - End) + (Stride - 1)" has unsigned overflow.
  10854. const SCEV *One = getOne(Stride->getType());
  10855. bool MayAddOverflow = [&] {
  10856. if (auto *StrideC = dyn_cast<SCEVConstant>(Stride)) {
  10857. if (StrideC->getAPInt().isPowerOf2()) {
  10858. // Suppose Stride is a power of two, and Start/End are unsigned
  10859. // integers. Let UMAX be the largest representable unsigned
  10860. // integer.
  10861. //
  10862. // By the preconditions of this function, we know
  10863. // "(Start + Stride * N) >= End", and this doesn't overflow.
  10864. // As a formula:
  10865. //
  10866. // End <= (Start + Stride * N) <= UMAX
  10867. //
  10868. // Subtracting Start from all the terms:
  10869. //
  10870. // End - Start <= Stride * N <= UMAX - Start
  10871. //
  10872. // Since Start is unsigned, UMAX - Start <= UMAX. Therefore:
  10873. //
  10874. // End - Start <= Stride * N <= UMAX
  10875. //
  10876. // Stride * N is a multiple of Stride. Therefore,
  10877. //
  10878. // End - Start <= Stride * N <= UMAX - (UMAX mod Stride)
  10879. //
  10880. // Since Stride is a power of two, UMAX + 1 is divisible by Stride.
  10881. // Therefore, UMAX mod Stride == Stride - 1. So we can write:
  10882. //
  10883. // End - Start <= Stride * N <= UMAX - Stride - 1
  10884. //
  10885. // Dropping the middle term:
  10886. //
  10887. // End - Start <= UMAX - Stride - 1
  10888. //
  10889. // Adding Stride - 1 to both sides:
  10890. //
  10891. // (End - Start) + (Stride - 1) <= UMAX
  10892. //
  10893. // In other words, the addition doesn't have unsigned overflow.
  10894. //
  10895. // A similar proof works if we treat Start/End as signed values.
  10896. // Just rewrite steps before "End - Start <= Stride * N <= UMAX" to
  10897. // use signed max instead of unsigned max. Note that we're trying
  10898. // to prove a lack of unsigned overflow in either case.
  10899. return false;
  10900. }
  10901. }
  10902. if (Start == Stride || Start == getMinusSCEV(Stride, One)) {
  10903. // If Start is equal to Stride, (End - Start) + (Stride - 1) == End - 1.
  10904. // If !IsSigned, 0 <u Stride == Start <=u End; so 0 <u End - 1 <u End.
  10905. // If IsSigned, 0 <s Stride == Start <=s End; so 0 <s End - 1 <s End.
  10906. //
  10907. // If Start is equal to Stride - 1, (End - Start) + Stride - 1 == End.
  10908. return false;
  10909. }
  10910. return true;
  10911. }();
  10912. const SCEV *Delta = getMinusSCEV(End, Start);
  10913. if (!MayAddOverflow) {
  10914. // floor((D + (S - 1)) / S)
  10915. // We prefer this formulation if it's legal because it's fewer operations.
  10916. BECount =
  10917. getUDivExpr(getAddExpr(Delta, getMinusSCEV(Stride, One)), Stride);
  10918. } else {
  10919. BECount = getUDivCeilSCEV(Delta, Stride);
  10920. }
  10921. }
  10922. const SCEV *MaxBECount;
  10923. bool MaxOrZero = false;
  10924. if (isa<SCEVConstant>(BECount)) {
  10925. MaxBECount = BECount;
  10926. } else if (BECountIfBackedgeTaken &&
  10927. isa<SCEVConstant>(BECountIfBackedgeTaken)) {
  10928. // If we know exactly how many times the backedge will be taken if it's
  10929. // taken at least once, then the backedge count will either be that or
  10930. // zero.
  10931. MaxBECount = BECountIfBackedgeTaken;
  10932. MaxOrZero = true;
  10933. } else {
  10934. MaxBECount = computeMaxBECountForLT(
  10935. Start, Stride, RHS, getTypeSizeInBits(LHS->getType()), IsSigned);
  10936. }
  10937. if (isa<SCEVCouldNotCompute>(MaxBECount) &&
  10938. !isa<SCEVCouldNotCompute>(BECount))
  10939. MaxBECount = getConstant(getUnsignedRangeMax(BECount));
  10940. return ExitLimit(BECount, MaxBECount, MaxOrZero, Predicates);
  10941. }
  10942. ScalarEvolution::ExitLimit
  10943. ScalarEvolution::howManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
  10944. const Loop *L, bool IsSigned,
  10945. bool ControlsExit, bool AllowPredicates) {
  10946. SmallPtrSet<const SCEVPredicate *, 4> Predicates;
  10947. // We handle only IV > Invariant
  10948. if (!isLoopInvariant(RHS, L))
  10949. return getCouldNotCompute();
  10950. const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS);
  10951. if (!IV && AllowPredicates)
  10952. // Try to make this an AddRec using runtime tests, in the first X
  10953. // iterations of this loop, where X is the SCEV expression found by the
  10954. // algorithm below.
  10955. IV = convertSCEVToAddRecWithPredicates(LHS, L, Predicates);
  10956. // Avoid weird loops
  10957. if (!IV || IV->getLoop() != L || !IV->isAffine())
  10958. return getCouldNotCompute();
  10959. auto WrapType = IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW;
  10960. bool NoWrap = ControlsExit && IV->getNoWrapFlags(WrapType);
  10961. ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
  10962. const SCEV *Stride = getNegativeSCEV(IV->getStepRecurrence(*this));
  10963. // Avoid negative or zero stride values
  10964. if (!isKnownPositive(Stride))
  10965. return getCouldNotCompute();
  10966. // Avoid proven overflow cases: this will ensure that the backedge taken count
  10967. // will not generate any unsigned overflow. Relaxed no-overflow conditions
  10968. // exploit NoWrapFlags, allowing to optimize in presence of undefined
  10969. // behaviors like the case of C language.
  10970. if (!Stride->isOne() && !NoWrap)
  10971. if (canIVOverflowOnGT(RHS, Stride, IsSigned))
  10972. return getCouldNotCompute();
  10973. const SCEV *Start = IV->getStart();
  10974. const SCEV *End = RHS;
  10975. if (!isLoopEntryGuardedByCond(L, Cond, getAddExpr(Start, Stride), RHS)) {
  10976. // If we know that Start >= RHS in the context of loop, then we know that
  10977. // min(RHS, Start) = RHS at this point.
  10978. if (isLoopEntryGuardedByCond(
  10979. L, IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE, Start, RHS))
  10980. End = RHS;
  10981. else
  10982. End = IsSigned ? getSMinExpr(RHS, Start) : getUMinExpr(RHS, Start);
  10983. }
  10984. if (Start->getType()->isPointerTy()) {
  10985. Start = getLosslessPtrToIntExpr(Start);
  10986. if (isa<SCEVCouldNotCompute>(Start))
  10987. return Start;
  10988. }
  10989. if (End->getType()->isPointerTy()) {
  10990. End = getLosslessPtrToIntExpr(End);
  10991. if (isa<SCEVCouldNotCompute>(End))
  10992. return End;
  10993. }
  10994. // Compute ((Start - End) + (Stride - 1)) / Stride.
  10995. // FIXME: This can overflow. Holding off on fixing this for now;
  10996. // howManyGreaterThans will hopefully be gone soon.
  10997. const SCEV *One = getOne(Stride->getType());
  10998. const SCEV *BECount = getUDivExpr(
  10999. getAddExpr(getMinusSCEV(Start, End), getMinusSCEV(Stride, One)), Stride);
  11000. APInt MaxStart = IsSigned ? getSignedRangeMax(Start)
  11001. : getUnsignedRangeMax(Start);
  11002. APInt MinStride = IsSigned ? getSignedRangeMin(Stride)
  11003. : getUnsignedRangeMin(Stride);
  11004. unsigned BitWidth = getTypeSizeInBits(LHS->getType());
  11005. APInt Limit = IsSigned ? APInt::getSignedMinValue(BitWidth) + (MinStride - 1)
  11006. : APInt::getMinValue(BitWidth) + (MinStride - 1);
  11007. // Although End can be a MIN expression we estimate MinEnd considering only
  11008. // the case End = RHS. This is safe because in the other case (Start - End)
  11009. // is zero, leading to a zero maximum backedge taken count.
  11010. APInt MinEnd =
  11011. IsSigned ? APIntOps::smax(getSignedRangeMin(RHS), Limit)
  11012. : APIntOps::umax(getUnsignedRangeMin(RHS), Limit);
  11013. const SCEV *MaxBECount = isa<SCEVConstant>(BECount)
  11014. ? BECount
  11015. : getUDivCeilSCEV(getConstant(MaxStart - MinEnd),
  11016. getConstant(MinStride));
  11017. if (isa<SCEVCouldNotCompute>(MaxBECount))
  11018. MaxBECount = BECount;
  11019. return ExitLimit(BECount, MaxBECount, false, Predicates);
  11020. }
  11021. const SCEV *SCEVAddRecExpr::getNumIterationsInRange(const ConstantRange &Range,
  11022. ScalarEvolution &SE) const {
  11023. if (Range.isFullSet()) // Infinite loop.
  11024. return SE.getCouldNotCompute();
  11025. // If the start is a non-zero constant, shift the range to simplify things.
  11026. if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(getStart()))
  11027. if (!SC->getValue()->isZero()) {
  11028. SmallVector<const SCEV *, 4> Operands(operands());
  11029. Operands[0] = SE.getZero(SC->getType());
  11030. const SCEV *Shifted = SE.getAddRecExpr(Operands, getLoop(),
  11031. getNoWrapFlags(FlagNW));
  11032. if (const auto *ShiftedAddRec = dyn_cast<SCEVAddRecExpr>(Shifted))
  11033. return ShiftedAddRec->getNumIterationsInRange(
  11034. Range.subtract(SC->getAPInt()), SE);
  11035. // This is strange and shouldn't happen.
  11036. return SE.getCouldNotCompute();
  11037. }
  11038. // The only time we can solve this is when we have all constant indices.
  11039. // Otherwise, we cannot determine the overflow conditions.
  11040. if (any_of(operands(), [](const SCEV *Op) { return !isa<SCEVConstant>(Op); }))
  11041. return SE.getCouldNotCompute();
  11042. // Okay at this point we know that all elements of the chrec are constants and
  11043. // that the start element is zero.
  11044. // First check to see if the range contains zero. If not, the first
  11045. // iteration exits.
  11046. unsigned BitWidth = SE.getTypeSizeInBits(getType());
  11047. if (!Range.contains(APInt(BitWidth, 0)))
  11048. return SE.getZero(getType());
  11049. if (isAffine()) {
  11050. // If this is an affine expression then we have this situation:
  11051. // Solve {0,+,A} in Range === Ax in Range
  11052. // We know that zero is in the range. If A is positive then we know that
  11053. // the upper value of the range must be the first possible exit value.
  11054. // If A is negative then the lower of the range is the last possible loop
  11055. // value. Also note that we already checked for a full range.
  11056. APInt A = cast<SCEVConstant>(getOperand(1))->getAPInt();
  11057. APInt End = A.sge(1) ? (Range.getUpper() - 1) : Range.getLower();
  11058. // The exit value should be (End+A)/A.
  11059. APInt ExitVal = (End + A).udiv(A);
  11060. ConstantInt *ExitValue = ConstantInt::get(SE.getContext(), ExitVal);
  11061. // Evaluate at the exit value. If we really did fall out of the valid
  11062. // range, then we computed our trip count, otherwise wrap around or other
  11063. // things must have happened.
  11064. ConstantInt *Val = EvaluateConstantChrecAtConstant(this, ExitValue, SE);
  11065. if (Range.contains(Val->getValue()))
  11066. return SE.getCouldNotCompute(); // Something strange happened
  11067. // Ensure that the previous value is in the range.
  11068. assert(Range.contains(
  11069. EvaluateConstantChrecAtConstant(this,
  11070. ConstantInt::get(SE.getContext(), ExitVal - 1), SE)->getValue()) &&
  11071. "Linear scev computation is off in a bad way!");
  11072. return SE.getConstant(ExitValue);
  11073. }
  11074. if (isQuadratic()) {
  11075. if (auto S = SolveQuadraticAddRecRange(this, Range, SE))
  11076. return SE.getConstant(S.getValue());
  11077. }
  11078. return SE.getCouldNotCompute();
  11079. }
  11080. const SCEVAddRecExpr *
  11081. SCEVAddRecExpr::getPostIncExpr(ScalarEvolution &SE) const {
  11082. assert(getNumOperands() > 1 && "AddRec with zero step?");
  11083. // There is a temptation to just call getAddExpr(this, getStepRecurrence(SE)),
  11084. // but in this case we cannot guarantee that the value returned will be an
  11085. // AddRec because SCEV does not have a fixed point where it stops
  11086. // simplification: it is legal to return ({rec1} + {rec2}). For example, it
  11087. // may happen if we reach arithmetic depth limit while simplifying. So we
  11088. // construct the returned value explicitly.
  11089. SmallVector<const SCEV *, 3> Ops;
  11090. // If this is {A,+,B,+,C,...,+,N}, then its step is {B,+,C,+,...,+,N}, and
  11091. // (this + Step) is {A+B,+,B+C,+...,+,N}.
  11092. for (unsigned i = 0, e = getNumOperands() - 1; i < e; ++i)
  11093. Ops.push_back(SE.getAddExpr(getOperand(i), getOperand(i + 1)));
  11094. // We know that the last operand is not a constant zero (otherwise it would
  11095. // have been popped out earlier). This guarantees us that if the result has
  11096. // the same last operand, then it will also not be popped out, meaning that
  11097. // the returned value will be an AddRec.
  11098. const SCEV *Last = getOperand(getNumOperands() - 1);
  11099. assert(!Last->isZero() && "Recurrency with zero step?");
  11100. Ops.push_back(Last);
  11101. return cast<SCEVAddRecExpr>(SE.getAddRecExpr(Ops, getLoop(),
  11102. SCEV::FlagAnyWrap));
  11103. }
  11104. // Return true when S contains at least an undef value.
  11105. bool ScalarEvolution::containsUndefs(const SCEV *S) const {
  11106. return SCEVExprContains(S, [](const SCEV *S) {
  11107. if (const auto *SU = dyn_cast<SCEVUnknown>(S))
  11108. return isa<UndefValue>(SU->getValue());
  11109. return false;
  11110. });
  11111. }
  11112. /// Return the size of an element read or written by Inst.
  11113. const SCEV *ScalarEvolution::getElementSize(Instruction *Inst) {
  11114. Type *Ty;
  11115. if (StoreInst *Store = dyn_cast<StoreInst>(Inst))
  11116. Ty = Store->getValueOperand()->getType();
  11117. else if (LoadInst *Load = dyn_cast<LoadInst>(Inst))
  11118. Ty = Load->getType();
  11119. else
  11120. return nullptr;
  11121. Type *ETy = getEffectiveSCEVType(PointerType::getUnqual(Ty));
  11122. return getSizeOfExpr(ETy, Ty);
  11123. }
  11124. //===----------------------------------------------------------------------===//
  11125. // SCEVCallbackVH Class Implementation
  11126. //===----------------------------------------------------------------------===//
  11127. void ScalarEvolution::SCEVCallbackVH::deleted() {
  11128. assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!");
  11129. if (PHINode *PN = dyn_cast<PHINode>(getValPtr()))
  11130. SE->ConstantEvolutionLoopExitValue.erase(PN);
  11131. SE->eraseValueFromMap(getValPtr());
  11132. // this now dangles!
  11133. }
  11134. void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *V) {
  11135. assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!");
  11136. // Forget all the expressions associated with users of the old value,
  11137. // so that future queries will recompute the expressions using the new
  11138. // value.
  11139. Value *Old = getValPtr();
  11140. SmallVector<User *, 16> Worklist(Old->users());
  11141. SmallPtrSet<User *, 8> Visited;
  11142. while (!Worklist.empty()) {
  11143. User *U = Worklist.pop_back_val();
  11144. // Deleting the Old value will cause this to dangle. Postpone
  11145. // that until everything else is done.
  11146. if (U == Old)
  11147. continue;
  11148. if (!Visited.insert(U).second)
  11149. continue;
  11150. if (PHINode *PN = dyn_cast<PHINode>(U))
  11151. SE->ConstantEvolutionLoopExitValue.erase(PN);
  11152. SE->eraseValueFromMap(U);
  11153. llvm::append_range(Worklist, U->users());
  11154. }
  11155. // Delete the Old value.
  11156. if (PHINode *PN = dyn_cast<PHINode>(Old))
  11157. SE->ConstantEvolutionLoopExitValue.erase(PN);
  11158. SE->eraseValueFromMap(Old);
  11159. // this now dangles!
  11160. }
  11161. ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se)
  11162. : CallbackVH(V), SE(se) {}
  11163. //===----------------------------------------------------------------------===//
  11164. // ScalarEvolution Class Implementation
  11165. //===----------------------------------------------------------------------===//
  11166. ScalarEvolution::ScalarEvolution(Function &F, TargetLibraryInfo &TLI,
  11167. AssumptionCache &AC, DominatorTree &DT,
  11168. LoopInfo &LI)
  11169. : F(F), TLI(TLI), AC(AC), DT(DT), LI(LI),
  11170. CouldNotCompute(new SCEVCouldNotCompute()), ValuesAtScopes(64),
  11171. LoopDispositions(64), BlockDispositions(64) {
  11172. // To use guards for proving predicates, we need to scan every instruction in
  11173. // relevant basic blocks, and not just terminators. Doing this is a waste of
  11174. // time if the IR does not actually contain any calls to
  11175. // @llvm.experimental.guard, so do a quick check and remember this beforehand.
  11176. //
  11177. // This pessimizes the case where a pass that preserves ScalarEvolution wants
  11178. // to _add_ guards to the module when there weren't any before, and wants
  11179. // ScalarEvolution to optimize based on those guards. For now we prefer to be
  11180. // efficient in lieu of being smart in that rather obscure case.
  11181. auto *GuardDecl = F.getParent()->getFunction(
  11182. Intrinsic::getName(Intrinsic::experimental_guard));
  11183. HasGuards = GuardDecl && !GuardDecl->use_empty();
  11184. }
  11185. ScalarEvolution::ScalarEvolution(ScalarEvolution &&Arg)
  11186. : F(Arg.F), HasGuards(Arg.HasGuards), TLI(Arg.TLI), AC(Arg.AC), DT(Arg.DT),
  11187. LI(Arg.LI), CouldNotCompute(std::move(Arg.CouldNotCompute)),
  11188. ValueExprMap(std::move(Arg.ValueExprMap)),
  11189. PendingLoopPredicates(std::move(Arg.PendingLoopPredicates)),
  11190. PendingPhiRanges(std::move(Arg.PendingPhiRanges)),
  11191. PendingMerges(std::move(Arg.PendingMerges)),
  11192. MinTrailingZerosCache(std::move(Arg.MinTrailingZerosCache)),
  11193. BackedgeTakenCounts(std::move(Arg.BackedgeTakenCounts)),
  11194. PredicatedBackedgeTakenCounts(
  11195. std::move(Arg.PredicatedBackedgeTakenCounts)),
  11196. BECountUsers(std::move(Arg.BECountUsers)),
  11197. ConstantEvolutionLoopExitValue(
  11198. std::move(Arg.ConstantEvolutionLoopExitValue)),
  11199. ValuesAtScopes(std::move(Arg.ValuesAtScopes)),
  11200. ValuesAtScopesUsers(std::move(Arg.ValuesAtScopesUsers)),
  11201. LoopDispositions(std::move(Arg.LoopDispositions)),
  11202. LoopPropertiesCache(std::move(Arg.LoopPropertiesCache)),
  11203. BlockDispositions(std::move(Arg.BlockDispositions)),
  11204. SCEVUsers(std::move(Arg.SCEVUsers)),
  11205. UnsignedRanges(std::move(Arg.UnsignedRanges)),
  11206. SignedRanges(std::move(Arg.SignedRanges)),
  11207. UniqueSCEVs(std::move(Arg.UniqueSCEVs)),
  11208. UniquePreds(std::move(Arg.UniquePreds)),
  11209. SCEVAllocator(std::move(Arg.SCEVAllocator)),
  11210. LoopUsers(std::move(Arg.LoopUsers)),
  11211. PredicatedSCEVRewrites(std::move(Arg.PredicatedSCEVRewrites)),
  11212. FirstUnknown(Arg.FirstUnknown) {
  11213. Arg.FirstUnknown = nullptr;
  11214. }
  11215. ScalarEvolution::~ScalarEvolution() {
  11216. // Iterate through all the SCEVUnknown instances and call their
  11217. // destructors, so that they release their references to their values.
  11218. for (SCEVUnknown *U = FirstUnknown; U;) {
  11219. SCEVUnknown *Tmp = U;
  11220. U = U->Next;
  11221. Tmp->~SCEVUnknown();
  11222. }
  11223. FirstUnknown = nullptr;
  11224. ExprValueMap.clear();
  11225. ValueExprMap.clear();
  11226. HasRecMap.clear();
  11227. BackedgeTakenCounts.clear();
  11228. PredicatedBackedgeTakenCounts.clear();
  11229. assert(PendingLoopPredicates.empty() && "isImpliedCond garbage");
  11230. assert(PendingPhiRanges.empty() && "getRangeRef garbage");
  11231. assert(PendingMerges.empty() && "isImpliedViaMerge garbage");
  11232. assert(!WalkingBEDominatingConds && "isLoopBackedgeGuardedByCond garbage!");
  11233. assert(!ProvingSplitPredicate && "ProvingSplitPredicate garbage!");
  11234. }
  11235. bool ScalarEvolution::hasLoopInvariantBackedgeTakenCount(const Loop *L) {
  11236. return !isa<SCEVCouldNotCompute>(getBackedgeTakenCount(L));
  11237. }
  11238. static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
  11239. const Loop *L) {
  11240. // Print all inner loops first
  11241. for (Loop *I : *L)
  11242. PrintLoopInfo(OS, SE, I);
  11243. OS << "Loop ";
  11244. L->getHeader()->printAsOperand(OS, /*PrintType=*/false);
  11245. OS << ": ";
  11246. SmallVector<BasicBlock *, 8> ExitingBlocks;
  11247. L->getExitingBlocks(ExitingBlocks);
  11248. if (ExitingBlocks.size() != 1)
  11249. OS << "<multiple exits> ";
  11250. if (SE->hasLoopInvariantBackedgeTakenCount(L))
  11251. OS << "backedge-taken count is " << *SE->getBackedgeTakenCount(L) << "\n";
  11252. else
  11253. OS << "Unpredictable backedge-taken count.\n";
  11254. if (ExitingBlocks.size() > 1)
  11255. for (BasicBlock *ExitingBlock : ExitingBlocks) {
  11256. OS << " exit count for " << ExitingBlock->getName() << ": "
  11257. << *SE->getExitCount(L, ExitingBlock) << "\n";
  11258. }
  11259. OS << "Loop ";
  11260. L->getHeader()->printAsOperand(OS, /*PrintType=*/false);
  11261. OS << ": ";
  11262. if (!isa<SCEVCouldNotCompute>(SE->getConstantMaxBackedgeTakenCount(L))) {
  11263. OS << "max backedge-taken count is " << *SE->getConstantMaxBackedgeTakenCount(L);
  11264. if (SE->isBackedgeTakenCountMaxOrZero(L))
  11265. OS << ", actual taken count either this or zero.";
  11266. } else {
  11267. OS << "Unpredictable max backedge-taken count. ";
  11268. }
  11269. OS << "\n"
  11270. "Loop ";
  11271. L->getHeader()->printAsOperand(OS, /*PrintType=*/false);
  11272. OS << ": ";
  11273. SCEVUnionPredicate Pred;
  11274. auto PBT = SE->getPredicatedBackedgeTakenCount(L, Pred);
  11275. if (!isa<SCEVCouldNotCompute>(PBT)) {
  11276. OS << "Predicated backedge-taken count is " << *PBT << "\n";
  11277. OS << " Predicates:\n";
  11278. Pred.print(OS, 4);
  11279. } else {
  11280. OS << "Unpredictable predicated backedge-taken count. ";
  11281. }
  11282. OS << "\n";
  11283. if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
  11284. OS << "Loop ";
  11285. L->getHeader()->printAsOperand(OS, /*PrintType=*/false);
  11286. OS << ": ";
  11287. OS << "Trip multiple is " << SE->getSmallConstantTripMultiple(L) << "\n";
  11288. }
  11289. }
  11290. static StringRef loopDispositionToStr(ScalarEvolution::LoopDisposition LD) {
  11291. switch (LD) {
  11292. case ScalarEvolution::LoopVariant:
  11293. return "Variant";
  11294. case ScalarEvolution::LoopInvariant:
  11295. return "Invariant";
  11296. case ScalarEvolution::LoopComputable:
  11297. return "Computable";
  11298. }
  11299. llvm_unreachable("Unknown ScalarEvolution::LoopDisposition kind!");
  11300. }
  11301. void ScalarEvolution::print(raw_ostream &OS) const {
  11302. // ScalarEvolution's implementation of the print method is to print
  11303. // out SCEV values of all instructions that are interesting. Doing
  11304. // this potentially causes it to create new SCEV objects though,
  11305. // which technically conflicts with the const qualifier. This isn't
  11306. // observable from outside the class though, so casting away the
  11307. // const isn't dangerous.
  11308. ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
  11309. if (ClassifyExpressions) {
  11310. OS << "Classifying expressions for: ";
  11311. F.printAsOperand(OS, /*PrintType=*/false);
  11312. OS << "\n";
  11313. for (Instruction &I : instructions(F))
  11314. if (isSCEVable(I.getType()) && !isa<CmpInst>(I)) {
  11315. OS << I << '\n';
  11316. OS << " --> ";
  11317. const SCEV *SV = SE.getSCEV(&I);
  11318. SV->print(OS);
  11319. if (!isa<SCEVCouldNotCompute>(SV)) {
  11320. OS << " U: ";
  11321. SE.getUnsignedRange(SV).print(OS);
  11322. OS << " S: ";
  11323. SE.getSignedRange(SV).print(OS);
  11324. }
  11325. const Loop *L = LI.getLoopFor(I.getParent());
  11326. const SCEV *AtUse = SE.getSCEVAtScope(SV, L);
  11327. if (AtUse != SV) {
  11328. OS << " --> ";
  11329. AtUse->print(OS);
  11330. if (!isa<SCEVCouldNotCompute>(AtUse)) {
  11331. OS << " U: ";
  11332. SE.getUnsignedRange(AtUse).print(OS);
  11333. OS << " S: ";
  11334. SE.getSignedRange(AtUse).print(OS);
  11335. }
  11336. }
  11337. if (L) {
  11338. OS << "\t\t" "Exits: ";
  11339. const SCEV *ExitValue = SE.getSCEVAtScope(SV, L->getParentLoop());
  11340. if (!SE.isLoopInvariant(ExitValue, L)) {
  11341. OS << "<<Unknown>>";
  11342. } else {
  11343. OS << *ExitValue;
  11344. }
  11345. bool First = true;
  11346. for (auto *Iter = L; Iter; Iter = Iter->getParentLoop()) {
  11347. if (First) {
  11348. OS << "\t\t" "LoopDispositions: { ";
  11349. First = false;
  11350. } else {
  11351. OS << ", ";
  11352. }
  11353. Iter->getHeader()->printAsOperand(OS, /*PrintType=*/false);
  11354. OS << ": " << loopDispositionToStr(SE.getLoopDisposition(SV, Iter));
  11355. }
  11356. for (auto *InnerL : depth_first(L)) {
  11357. if (InnerL == L)
  11358. continue;
  11359. if (First) {
  11360. OS << "\t\t" "LoopDispositions: { ";
  11361. First = false;
  11362. } else {
  11363. OS << ", ";
  11364. }
  11365. InnerL->getHeader()->printAsOperand(OS, /*PrintType=*/false);
  11366. OS << ": " << loopDispositionToStr(SE.getLoopDisposition(SV, InnerL));
  11367. }
  11368. OS << " }";
  11369. }
  11370. OS << "\n";
  11371. }
  11372. }
  11373. OS << "Determining loop execution counts for: ";
  11374. F.printAsOperand(OS, /*PrintType=*/false);
  11375. OS << "\n";
  11376. for (Loop *I : LI)
  11377. PrintLoopInfo(OS, &SE, I);
  11378. }
  11379. ScalarEvolution::LoopDisposition
  11380. ScalarEvolution::getLoopDisposition(const SCEV *S, const Loop *L) {
  11381. auto &Values = LoopDispositions[S];
  11382. for (auto &V : Values) {
  11383. if (V.getPointer() == L)
  11384. return V.getInt();
  11385. }
  11386. Values.emplace_back(L, LoopVariant);
  11387. LoopDisposition D = computeLoopDisposition(S, L);
  11388. auto &Values2 = LoopDispositions[S];
  11389. for (auto &V : llvm::reverse(Values2)) {
  11390. if (V.getPointer() == L) {
  11391. V.setInt(D);
  11392. break;
  11393. }
  11394. }
  11395. return D;
  11396. }
  11397. ScalarEvolution::LoopDisposition
  11398. ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) {
  11399. switch (S->getSCEVType()) {
  11400. case scConstant:
  11401. return LoopInvariant;
  11402. case scPtrToInt:
  11403. case scTruncate:
  11404. case scZeroExtend:
  11405. case scSignExtend:
  11406. return getLoopDisposition(cast<SCEVCastExpr>(S)->getOperand(), L);
  11407. case scAddRecExpr: {
  11408. const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S);
  11409. // If L is the addrec's loop, it's computable.
  11410. if (AR->getLoop() == L)
  11411. return LoopComputable;
  11412. // Add recurrences are never invariant in the function-body (null loop).
  11413. if (!L)
  11414. return LoopVariant;
  11415. // Everything that is not defined at loop entry is variant.
  11416. if (DT.dominates(L->getHeader(), AR->getLoop()->getHeader()))
  11417. return LoopVariant;
  11418. assert(!L->contains(AR->getLoop()) && "Containing loop's header does not"
  11419. " dominate the contained loop's header?");
  11420. // This recurrence is invariant w.r.t. L if AR's loop contains L.
  11421. if (AR->getLoop()->contains(L))
  11422. return LoopInvariant;
  11423. // This recurrence is variant w.r.t. L if any of its operands
  11424. // are variant.
  11425. for (auto *Op : AR->operands())
  11426. if (!isLoopInvariant(Op, L))
  11427. return LoopVariant;
  11428. // Otherwise it's loop-invariant.
  11429. return LoopInvariant;
  11430. }
  11431. case scAddExpr:
  11432. case scMulExpr:
  11433. case scUMaxExpr:
  11434. case scSMaxExpr:
  11435. case scUMinExpr:
  11436. case scSMinExpr:
  11437. case scSequentialUMinExpr: {
  11438. bool HasVarying = false;
  11439. for (auto *Op : cast<SCEVNAryExpr>(S)->operands()) {
  11440. LoopDisposition D = getLoopDisposition(Op, L);
  11441. if (D == LoopVariant)
  11442. return LoopVariant;
  11443. if (D == LoopComputable)
  11444. HasVarying = true;
  11445. }
  11446. return HasVarying ? LoopComputable : LoopInvariant;
  11447. }
  11448. case scUDivExpr: {
  11449. const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S);
  11450. LoopDisposition LD = getLoopDisposition(UDiv->getLHS(), L);
  11451. if (LD == LoopVariant)
  11452. return LoopVariant;
  11453. LoopDisposition RD = getLoopDisposition(UDiv->getRHS(), L);
  11454. if (RD == LoopVariant)
  11455. return LoopVariant;
  11456. return (LD == LoopInvariant && RD == LoopInvariant) ?
  11457. LoopInvariant : LoopComputable;
  11458. }
  11459. case scUnknown:
  11460. // All non-instruction values are loop invariant. All instructions are loop
  11461. // invariant if they are not contained in the specified loop.
  11462. // Instructions are never considered invariant in the function body
  11463. // (null loop) because they are defined within the "loop".
  11464. if (auto *I = dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue()))
  11465. return (L && !L->contains(I)) ? LoopInvariant : LoopVariant;
  11466. return LoopInvariant;
  11467. case scCouldNotCompute:
  11468. llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
  11469. }
  11470. llvm_unreachable("Unknown SCEV kind!");
  11471. }
  11472. bool ScalarEvolution::isLoopInvariant(const SCEV *S, const Loop *L) {
  11473. return getLoopDisposition(S, L) == LoopInvariant;
  11474. }
  11475. bool ScalarEvolution::hasComputableLoopEvolution(const SCEV *S, const Loop *L) {
  11476. return getLoopDisposition(S, L) == LoopComputable;
  11477. }
  11478. ScalarEvolution::BlockDisposition
  11479. ScalarEvolution::getBlockDisposition(const SCEV *S, const BasicBlock *BB) {
  11480. auto &Values = BlockDispositions[S];
  11481. for (auto &V : Values) {
  11482. if (V.getPointer() == BB)
  11483. return V.getInt();
  11484. }
  11485. Values.emplace_back(BB, DoesNotDominateBlock);
  11486. BlockDisposition D = computeBlockDisposition(S, BB);
  11487. auto &Values2 = BlockDispositions[S];
  11488. for (auto &V : llvm::reverse(Values2)) {
  11489. if (V.getPointer() == BB) {
  11490. V.setInt(D);
  11491. break;
  11492. }
  11493. }
  11494. return D;
  11495. }
  11496. ScalarEvolution::BlockDisposition
  11497. ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) {
  11498. switch (S->getSCEVType()) {
  11499. case scConstant:
  11500. return ProperlyDominatesBlock;
  11501. case scPtrToInt:
  11502. case scTruncate:
  11503. case scZeroExtend:
  11504. case scSignExtend:
  11505. return getBlockDisposition(cast<SCEVCastExpr>(S)->getOperand(), BB);
  11506. case scAddRecExpr: {
  11507. // This uses a "dominates" query instead of "properly dominates" query
  11508. // to test for proper dominance too, because the instruction which
  11509. // produces the addrec's value is a PHI, and a PHI effectively properly
  11510. // dominates its entire containing block.
  11511. const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S);
  11512. if (!DT.dominates(AR->getLoop()->getHeader(), BB))
  11513. return DoesNotDominateBlock;
  11514. // Fall through into SCEVNAryExpr handling.
  11515. LLVM_FALLTHROUGH;
  11516. }
  11517. case scAddExpr:
  11518. case scMulExpr:
  11519. case scUMaxExpr:
  11520. case scSMaxExpr:
  11521. case scUMinExpr:
  11522. case scSMinExpr:
  11523. case scSequentialUMinExpr: {
  11524. const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S);
  11525. bool Proper = true;
  11526. for (const SCEV *NAryOp : NAry->operands()) {
  11527. BlockDisposition D = getBlockDisposition(NAryOp, BB);
  11528. if (D == DoesNotDominateBlock)
  11529. return DoesNotDominateBlock;
  11530. if (D == DominatesBlock)
  11531. Proper = false;
  11532. }
  11533. return Proper ? ProperlyDominatesBlock : DominatesBlock;
  11534. }
  11535. case scUDivExpr: {
  11536. const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S);
  11537. const SCEV *LHS = UDiv->getLHS(), *RHS = UDiv->getRHS();
  11538. BlockDisposition LD = getBlockDisposition(LHS, BB);
  11539. if (LD == DoesNotDominateBlock)
  11540. return DoesNotDominateBlock;
  11541. BlockDisposition RD = getBlockDisposition(RHS, BB);
  11542. if (RD == DoesNotDominateBlock)
  11543. return DoesNotDominateBlock;
  11544. return (LD == ProperlyDominatesBlock && RD == ProperlyDominatesBlock) ?
  11545. ProperlyDominatesBlock : DominatesBlock;
  11546. }
  11547. case scUnknown:
  11548. if (Instruction *I =
  11549. dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue())) {
  11550. if (I->getParent() == BB)
  11551. return DominatesBlock;
  11552. if (DT.properlyDominates(I->getParent(), BB))
  11553. return ProperlyDominatesBlock;
  11554. return DoesNotDominateBlock;
  11555. }
  11556. return ProperlyDominatesBlock;
  11557. case scCouldNotCompute:
  11558. llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
  11559. }
  11560. llvm_unreachable("Unknown SCEV kind!");
  11561. }
  11562. bool ScalarEvolution::dominates(const SCEV *S, const BasicBlock *BB) {
  11563. return getBlockDisposition(S, BB) >= DominatesBlock;
  11564. }
  11565. bool ScalarEvolution::properlyDominates(const SCEV *S, const BasicBlock *BB) {
  11566. return getBlockDisposition(S, BB) == ProperlyDominatesBlock;
  11567. }
  11568. bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const {
  11569. return SCEVExprContains(S, [&](const SCEV *Expr) { return Expr == Op; });
  11570. }
  11571. void ScalarEvolution::forgetBackedgeTakenCounts(const Loop *L,
  11572. bool Predicated) {
  11573. auto &BECounts =
  11574. Predicated ? PredicatedBackedgeTakenCounts : BackedgeTakenCounts;
  11575. auto It = BECounts.find(L);
  11576. if (It != BECounts.end()) {
  11577. for (const ExitNotTakenInfo &ENT : It->second.ExitNotTaken) {
  11578. if (!isa<SCEVConstant>(ENT.ExactNotTaken)) {
  11579. auto UserIt = BECountUsers.find(ENT.ExactNotTaken);
  11580. assert(UserIt != BECountUsers.end());
  11581. UserIt->second.erase({L, Predicated});
  11582. }
  11583. }
  11584. BECounts.erase(It);
  11585. }
  11586. }
  11587. void ScalarEvolution::forgetMemoizedResults(ArrayRef<const SCEV *> SCEVs) {
  11588. SmallPtrSet<const SCEV *, 8> ToForget(SCEVs.begin(), SCEVs.end());
  11589. SmallVector<const SCEV *, 8> Worklist(ToForget.begin(), ToForget.end());
  11590. while (!Worklist.empty()) {
  11591. const SCEV *Curr = Worklist.pop_back_val();
  11592. auto Users = SCEVUsers.find(Curr);
  11593. if (Users != SCEVUsers.end())
  11594. for (auto *User : Users->second)
  11595. if (ToForget.insert(User).second)
  11596. Worklist.push_back(User);
  11597. }
  11598. for (auto *S : ToForget)
  11599. forgetMemoizedResultsImpl(S);
  11600. for (auto I = PredicatedSCEVRewrites.begin();
  11601. I != PredicatedSCEVRewrites.end();) {
  11602. std::pair<const SCEV *, const Loop *> Entry = I->first;
  11603. if (ToForget.count(Entry.first))
  11604. PredicatedSCEVRewrites.erase(I++);
  11605. else
  11606. ++I;
  11607. }
  11608. }
  11609. void ScalarEvolution::forgetMemoizedResultsImpl(const SCEV *S) {
  11610. LoopDispositions.erase(S);
  11611. BlockDispositions.erase(S);
  11612. UnsignedRanges.erase(S);
  11613. SignedRanges.erase(S);
  11614. HasRecMap.erase(S);
  11615. MinTrailingZerosCache.erase(S);
  11616. auto ExprIt = ExprValueMap.find(S);
  11617. if (ExprIt != ExprValueMap.end()) {
  11618. for (auto &ValueAndOffset : ExprIt->second) {
  11619. if (ValueAndOffset.second == nullptr) {
  11620. auto ValueIt = ValueExprMap.find_as(ValueAndOffset.first);
  11621. if (ValueIt != ValueExprMap.end())
  11622. ValueExprMap.erase(ValueIt);
  11623. }
  11624. }
  11625. ExprValueMap.erase(ExprIt);
  11626. }
  11627. auto ScopeIt = ValuesAtScopes.find(S);
  11628. if (ScopeIt != ValuesAtScopes.end()) {
  11629. for (const auto &Pair : ScopeIt->second)
  11630. if (!isa_and_nonnull<SCEVConstant>(Pair.second))
  11631. erase_value(ValuesAtScopesUsers[Pair.second],
  11632. std::make_pair(Pair.first, S));
  11633. ValuesAtScopes.erase(ScopeIt);
  11634. }
  11635. auto ScopeUserIt = ValuesAtScopesUsers.find(S);
  11636. if (ScopeUserIt != ValuesAtScopesUsers.end()) {
  11637. for (const auto &Pair : ScopeUserIt->second)
  11638. erase_value(ValuesAtScopes[Pair.second], std::make_pair(Pair.first, S));
  11639. ValuesAtScopesUsers.erase(ScopeUserIt);
  11640. }
  11641. auto BEUsersIt = BECountUsers.find(S);
  11642. if (BEUsersIt != BECountUsers.end()) {
  11643. // Work on a copy, as forgetBackedgeTakenCounts() will modify the original.
  11644. auto Copy = BEUsersIt->second;
  11645. for (const auto &Pair : Copy)
  11646. forgetBackedgeTakenCounts(Pair.getPointer(), Pair.getInt());
  11647. BECountUsers.erase(BEUsersIt);
  11648. }
  11649. }
  11650. void
  11651. ScalarEvolution::getUsedLoops(const SCEV *S,
  11652. SmallPtrSetImpl<const Loop *> &LoopsUsed) {
  11653. struct FindUsedLoops {
  11654. FindUsedLoops(SmallPtrSetImpl<const Loop *> &LoopsUsed)
  11655. : LoopsUsed(LoopsUsed) {}
  11656. SmallPtrSetImpl<const Loop *> &LoopsUsed;
  11657. bool follow(const SCEV *S) {
  11658. if (auto *AR = dyn_cast<SCEVAddRecExpr>(S))
  11659. LoopsUsed.insert(AR->getLoop());
  11660. return true;
  11661. }
  11662. bool isDone() const { return false; }
  11663. };
  11664. FindUsedLoops F(LoopsUsed);
  11665. SCEVTraversal<FindUsedLoops>(F).visitAll(S);
  11666. }
  11667. void ScalarEvolution::verify() const {
  11668. ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
  11669. ScalarEvolution SE2(F, TLI, AC, DT, LI);
  11670. SmallVector<Loop *, 8> LoopStack(LI.begin(), LI.end());
  11671. // Map's SCEV expressions from one ScalarEvolution "universe" to another.
  11672. struct SCEVMapper : public SCEVRewriteVisitor<SCEVMapper> {
  11673. SCEVMapper(ScalarEvolution &SE) : SCEVRewriteVisitor<SCEVMapper>(SE) {}
  11674. const SCEV *visitConstant(const SCEVConstant *Constant) {
  11675. return SE.getConstant(Constant->getAPInt());
  11676. }
  11677. const SCEV *visitUnknown(const SCEVUnknown *Expr) {
  11678. return SE.getUnknown(Expr->getValue());
  11679. }
  11680. const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) {
  11681. return SE.getCouldNotCompute();
  11682. }
  11683. };
  11684. SCEVMapper SCM(SE2);
  11685. while (!LoopStack.empty()) {
  11686. auto *L = LoopStack.pop_back_val();
  11687. llvm::append_range(LoopStack, *L);
  11688. auto *CurBECount = SCM.visit(
  11689. const_cast<ScalarEvolution *>(this)->getBackedgeTakenCount(L));
  11690. auto *NewBECount = SE2.getBackedgeTakenCount(L);
  11691. if (CurBECount == SE2.getCouldNotCompute() ||
  11692. NewBECount == SE2.getCouldNotCompute()) {
  11693. // NB! This situation is legal, but is very suspicious -- whatever pass
  11694. // change the loop to make a trip count go from could not compute to
  11695. // computable or vice-versa *should have* invalidated SCEV. However, we
  11696. // choose not to assert here (for now) since we don't want false
  11697. // positives.
  11698. continue;
  11699. }
  11700. if (containsUndefs(CurBECount) || containsUndefs(NewBECount)) {
  11701. // SCEV treats "undef" as an unknown but consistent value (i.e. it does
  11702. // not propagate undef aggressively). This means we can (and do) fail
  11703. // verification in cases where a transform makes the trip count of a loop
  11704. // go from "undef" to "undef+1" (say). The transform is fine, since in
  11705. // both cases the loop iterates "undef" times, but SCEV thinks we
  11706. // increased the trip count of the loop by 1 incorrectly.
  11707. continue;
  11708. }
  11709. if (SE.getTypeSizeInBits(CurBECount->getType()) >
  11710. SE.getTypeSizeInBits(NewBECount->getType()))
  11711. NewBECount = SE2.getZeroExtendExpr(NewBECount, CurBECount->getType());
  11712. else if (SE.getTypeSizeInBits(CurBECount->getType()) <
  11713. SE.getTypeSizeInBits(NewBECount->getType()))
  11714. CurBECount = SE2.getZeroExtendExpr(CurBECount, NewBECount->getType());
  11715. const SCEV *Delta = SE2.getMinusSCEV(CurBECount, NewBECount);
  11716. // Unless VerifySCEVStrict is set, we only compare constant deltas.
  11717. if ((VerifySCEVStrict || isa<SCEVConstant>(Delta)) && !Delta->isZero()) {
  11718. dbgs() << "Trip Count for " << *L << " Changed!\n";
  11719. dbgs() << "Old: " << *CurBECount << "\n";
  11720. dbgs() << "New: " << *NewBECount << "\n";
  11721. dbgs() << "Delta: " << *Delta << "\n";
  11722. std::abort();
  11723. }
  11724. }
  11725. // Collect all valid loops currently in LoopInfo.
  11726. SmallPtrSet<Loop *, 32> ValidLoops;
  11727. SmallVector<Loop *, 32> Worklist(LI.begin(), LI.end());
  11728. while (!Worklist.empty()) {
  11729. Loop *L = Worklist.pop_back_val();
  11730. if (ValidLoops.contains(L))
  11731. continue;
  11732. ValidLoops.insert(L);
  11733. Worklist.append(L->begin(), L->end());
  11734. }
  11735. for (auto &KV : ValueExprMap) {
  11736. #ifndef NDEBUG
  11737. // Check for SCEV expressions referencing invalid/deleted loops.
  11738. if (auto *AR = dyn_cast<SCEVAddRecExpr>(KV.second)) {
  11739. assert(ValidLoops.contains(AR->getLoop()) &&
  11740. "AddRec references invalid loop");
  11741. }
  11742. #endif
  11743. // Check that the value is also part of the reverse map.
  11744. auto It = ExprValueMap.find(KV.second);
  11745. if (It == ExprValueMap.end() || !It->second.contains({KV.first, nullptr})) {
  11746. dbgs() << "Value " << *KV.first
  11747. << " is in ValueExprMap but not in ExprValueMap\n";
  11748. std::abort();
  11749. }
  11750. }
  11751. for (const auto &KV : ExprValueMap) {
  11752. for (const auto &ValueAndOffset : KV.second) {
  11753. if (ValueAndOffset.second != nullptr)
  11754. continue;
  11755. auto It = ValueExprMap.find_as(ValueAndOffset.first);
  11756. if (It == ValueExprMap.end()) {
  11757. dbgs() << "Value " << *ValueAndOffset.first
  11758. << " is in ExprValueMap but not in ValueExprMap\n";
  11759. std::abort();
  11760. }
  11761. if (It->second != KV.first) {
  11762. dbgs() << "Value " << *ValueAndOffset.first
  11763. << " mapped to " << *It->second
  11764. << " rather than " << *KV.first << "\n";
  11765. std::abort();
  11766. }
  11767. }
  11768. }
  11769. // Verify integrity of SCEV users.
  11770. for (const auto &S : UniqueSCEVs) {
  11771. SmallVector<const SCEV *, 4> Ops;
  11772. collectUniqueOps(&S, Ops);
  11773. for (const auto *Op : Ops) {
  11774. // We do not store dependencies of constants.
  11775. if (isa<SCEVConstant>(Op))
  11776. continue;
  11777. auto It = SCEVUsers.find(Op);
  11778. if (It != SCEVUsers.end() && It->second.count(&S))
  11779. continue;
  11780. dbgs() << "Use of operand " << *Op << " by user " << S
  11781. << " is not being tracked!\n";
  11782. std::abort();
  11783. }
  11784. }
  11785. // Verify integrity of ValuesAtScopes users.
  11786. for (const auto &ValueAndVec : ValuesAtScopes) {
  11787. const SCEV *Value = ValueAndVec.first;
  11788. for (const auto &LoopAndValueAtScope : ValueAndVec.second) {
  11789. const Loop *L = LoopAndValueAtScope.first;
  11790. const SCEV *ValueAtScope = LoopAndValueAtScope.second;
  11791. if (!isa<SCEVConstant>(ValueAtScope)) {
  11792. auto It = ValuesAtScopesUsers.find(ValueAtScope);
  11793. if (It != ValuesAtScopesUsers.end() &&
  11794. is_contained(It->second, std::make_pair(L, Value)))
  11795. continue;
  11796. dbgs() << "Value: " << *Value << ", Loop: " << *L << ", ValueAtScope: "
  11797. << *ValueAtScope << " missing in ValuesAtScopesUsers\n";
  11798. std::abort();
  11799. }
  11800. }
  11801. }
  11802. for (const auto &ValueAtScopeAndVec : ValuesAtScopesUsers) {
  11803. const SCEV *ValueAtScope = ValueAtScopeAndVec.first;
  11804. for (const auto &LoopAndValue : ValueAtScopeAndVec.second) {
  11805. const Loop *L = LoopAndValue.first;
  11806. const SCEV *Value = LoopAndValue.second;
  11807. assert(!isa<SCEVConstant>(Value));
  11808. auto It = ValuesAtScopes.find(Value);
  11809. if (It != ValuesAtScopes.end() &&
  11810. is_contained(It->second, std::make_pair(L, ValueAtScope)))
  11811. continue;
  11812. dbgs() << "Value: " << *Value << ", Loop: " << *L << ", ValueAtScope: "
  11813. << *ValueAtScope << " missing in ValuesAtScopes\n";
  11814. std::abort();
  11815. }
  11816. }
  11817. // Verify integrity of BECountUsers.
  11818. auto VerifyBECountUsers = [&](bool Predicated) {
  11819. auto &BECounts =
  11820. Predicated ? PredicatedBackedgeTakenCounts : BackedgeTakenCounts;
  11821. for (const auto &LoopAndBEInfo : BECounts) {
  11822. for (const ExitNotTakenInfo &ENT : LoopAndBEInfo.second.ExitNotTaken) {
  11823. if (!isa<SCEVConstant>(ENT.ExactNotTaken)) {
  11824. auto UserIt = BECountUsers.find(ENT.ExactNotTaken);
  11825. if (UserIt != BECountUsers.end() &&
  11826. UserIt->second.contains({ LoopAndBEInfo.first, Predicated }))
  11827. continue;
  11828. dbgs() << "Value " << *ENT.ExactNotTaken << " for loop "
  11829. << *LoopAndBEInfo.first << " missing from BECountUsers\n";
  11830. std::abort();
  11831. }
  11832. }
  11833. }
  11834. };
  11835. VerifyBECountUsers(/* Predicated */ false);
  11836. VerifyBECountUsers(/* Predicated */ true);
  11837. }
  11838. bool ScalarEvolution::invalidate(
  11839. Function &F, const PreservedAnalyses &PA,
  11840. FunctionAnalysisManager::Invalidator &Inv) {
  11841. // Invalidate the ScalarEvolution object whenever it isn't preserved or one
  11842. // of its dependencies is invalidated.
  11843. auto PAC = PA.getChecker<ScalarEvolutionAnalysis>();
  11844. return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>()) ||
  11845. Inv.invalidate<AssumptionAnalysis>(F, PA) ||
  11846. Inv.invalidate<DominatorTreeAnalysis>(F, PA) ||
  11847. Inv.invalidate<LoopAnalysis>(F, PA);
  11848. }
  11849. AnalysisKey ScalarEvolutionAnalysis::Key;
  11850. ScalarEvolution ScalarEvolutionAnalysis::run(Function &F,
  11851. FunctionAnalysisManager &AM) {
  11852. return ScalarEvolution(F, AM.getResult<TargetLibraryAnalysis>(F),
  11853. AM.getResult<AssumptionAnalysis>(F),
  11854. AM.getResult<DominatorTreeAnalysis>(F),
  11855. AM.getResult<LoopAnalysis>(F));
  11856. }
  11857. PreservedAnalyses
  11858. ScalarEvolutionVerifierPass::run(Function &F, FunctionAnalysisManager &AM) {
  11859. AM.getResult<ScalarEvolutionAnalysis>(F).verify();
  11860. return PreservedAnalyses::all();
  11861. }
  11862. PreservedAnalyses
  11863. ScalarEvolutionPrinterPass::run(Function &F, FunctionAnalysisManager &AM) {
  11864. // For compatibility with opt's -analyze feature under legacy pass manager
  11865. // which was not ported to NPM. This keeps tests using
  11866. // update_analyze_test_checks.py working.
  11867. OS << "Printing analysis 'Scalar Evolution Analysis' for function '"
  11868. << F.getName() << "':\n";
  11869. AM.getResult<ScalarEvolutionAnalysis>(F).print(OS);
  11870. return PreservedAnalyses::all();
  11871. }
  11872. INITIALIZE_PASS_BEGIN(ScalarEvolutionWrapperPass, "scalar-evolution",
  11873. "Scalar Evolution Analysis", false, true)
  11874. INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
  11875. INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
  11876. INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  11877. INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
  11878. INITIALIZE_PASS_END(ScalarEvolutionWrapperPass, "scalar-evolution",
  11879. "Scalar Evolution Analysis", false, true)
  11880. char ScalarEvolutionWrapperPass::ID = 0;
  11881. ScalarEvolutionWrapperPass::ScalarEvolutionWrapperPass() : FunctionPass(ID) {
  11882. initializeScalarEvolutionWrapperPassPass(*PassRegistry::getPassRegistry());
  11883. }
  11884. bool ScalarEvolutionWrapperPass::runOnFunction(Function &F) {
  11885. SE.reset(new ScalarEvolution(
  11886. F, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F),
  11887. getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F),
  11888. getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
  11889. getAnalysis<LoopInfoWrapperPass>().getLoopInfo()));
  11890. return false;
  11891. }
  11892. void ScalarEvolutionWrapperPass::releaseMemory() { SE.reset(); }
  11893. void ScalarEvolutionWrapperPass::print(raw_ostream &OS, const Module *) const {
  11894. SE->print(OS);
  11895. }
  11896. void ScalarEvolutionWrapperPass::verifyAnalysis() const {
  11897. if (!VerifySCEV)
  11898. return;
  11899. SE->verify();
  11900. }
  11901. void ScalarEvolutionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
  11902. AU.setPreservesAll();
  11903. AU.addRequiredTransitive<AssumptionCacheTracker>();
  11904. AU.addRequiredTransitive<LoopInfoWrapperPass>();
  11905. AU.addRequiredTransitive<DominatorTreeWrapperPass>();
  11906. AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>();
  11907. }
  11908. const SCEVPredicate *ScalarEvolution::getEqualPredicate(const SCEV *LHS,
  11909. const SCEV *RHS) {
  11910. FoldingSetNodeID ID;
  11911. assert(LHS->getType() == RHS->getType() &&
  11912. "Type mismatch between LHS and RHS");
  11913. // Unique this node based on the arguments
  11914. ID.AddInteger(SCEVPredicate::P_Equal);
  11915. ID.AddPointer(LHS);
  11916. ID.AddPointer(RHS);
  11917. void *IP = nullptr;
  11918. if (const auto *S = UniquePreds.FindNodeOrInsertPos(ID, IP))
  11919. return S;
  11920. SCEVEqualPredicate *Eq = new (SCEVAllocator)
  11921. SCEVEqualPredicate(ID.Intern(SCEVAllocator), LHS, RHS);
  11922. UniquePreds.InsertNode(Eq, IP);
  11923. return Eq;
  11924. }
  11925. const SCEVPredicate *ScalarEvolution::getWrapPredicate(
  11926. const SCEVAddRecExpr *AR,
  11927. SCEVWrapPredicate::IncrementWrapFlags AddedFlags) {
  11928. FoldingSetNodeID ID;
  11929. // Unique this node based on the arguments
  11930. ID.AddInteger(SCEVPredicate::P_Wrap);
  11931. ID.AddPointer(AR);
  11932. ID.AddInteger(AddedFlags);
  11933. void *IP = nullptr;
  11934. if (const auto *S = UniquePreds.FindNodeOrInsertPos(ID, IP))
  11935. return S;
  11936. auto *OF = new (SCEVAllocator)
  11937. SCEVWrapPredicate(ID.Intern(SCEVAllocator), AR, AddedFlags);
  11938. UniquePreds.InsertNode(OF, IP);
  11939. return OF;
  11940. }
  11941. namespace {
  11942. class SCEVPredicateRewriter : public SCEVRewriteVisitor<SCEVPredicateRewriter> {
  11943. public:
  11944. /// Rewrites \p S in the context of a loop L and the SCEV predication
  11945. /// infrastructure.
  11946. ///
  11947. /// If \p Pred is non-null, the SCEV expression is rewritten to respect the
  11948. /// equivalences present in \p Pred.
  11949. ///
  11950. /// If \p NewPreds is non-null, rewrite is free to add further predicates to
  11951. /// \p NewPreds such that the result will be an AddRecExpr.
  11952. static const SCEV *rewrite(const SCEV *S, const Loop *L, ScalarEvolution &SE,
  11953. SmallPtrSetImpl<const SCEVPredicate *> *NewPreds,
  11954. SCEVUnionPredicate *Pred) {
  11955. SCEVPredicateRewriter Rewriter(L, SE, NewPreds, Pred);
  11956. return Rewriter.visit(S);
  11957. }
  11958. const SCEV *visitUnknown(const SCEVUnknown *Expr) {
  11959. if (Pred) {
  11960. auto ExprPreds = Pred->getPredicatesForExpr(Expr);
  11961. for (auto *Pred : ExprPreds)
  11962. if (const auto *IPred = dyn_cast<SCEVEqualPredicate>(Pred))
  11963. if (IPred->getLHS() == Expr)
  11964. return IPred->getRHS();
  11965. }
  11966. return convertToAddRecWithPreds(Expr);
  11967. }
  11968. const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
  11969. const SCEV *Operand = visit(Expr->getOperand());
  11970. const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Operand);
  11971. if (AR && AR->getLoop() == L && AR->isAffine()) {
  11972. // This couldn't be folded because the operand didn't have the nuw
  11973. // flag. Add the nusw flag as an assumption that we could make.
  11974. const SCEV *Step = AR->getStepRecurrence(SE);
  11975. Type *Ty = Expr->getType();
  11976. if (addOverflowAssumption(AR, SCEVWrapPredicate::IncrementNUSW))
  11977. return SE.getAddRecExpr(SE.getZeroExtendExpr(AR->getStart(), Ty),
  11978. SE.getSignExtendExpr(Step, Ty), L,
  11979. AR->getNoWrapFlags());
  11980. }
  11981. return SE.getZeroExtendExpr(Operand, Expr->getType());
  11982. }
  11983. const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) {
  11984. const SCEV *Operand = visit(Expr->getOperand());
  11985. const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Operand);
  11986. if (AR && AR->getLoop() == L && AR->isAffine()) {
  11987. // This couldn't be folded because the operand didn't have the nsw
  11988. // flag. Add the nssw flag as an assumption that we could make.
  11989. const SCEV *Step = AR->getStepRecurrence(SE);
  11990. Type *Ty = Expr->getType();
  11991. if (addOverflowAssumption(AR, SCEVWrapPredicate::IncrementNSSW))
  11992. return SE.getAddRecExpr(SE.getSignExtendExpr(AR->getStart(), Ty),
  11993. SE.getSignExtendExpr(Step, Ty), L,
  11994. AR->getNoWrapFlags());
  11995. }
  11996. return SE.getSignExtendExpr(Operand, Expr->getType());
  11997. }
  11998. private:
  11999. explicit SCEVPredicateRewriter(const Loop *L, ScalarEvolution &SE,
  12000. SmallPtrSetImpl<const SCEVPredicate *> *NewPreds,
  12001. SCEVUnionPredicate *Pred)
  12002. : SCEVRewriteVisitor(SE), NewPreds(NewPreds), Pred(Pred), L(L) {}
  12003. bool addOverflowAssumption(const SCEVPredicate *P) {
  12004. if (!NewPreds) {
  12005. // Check if we've already made this assumption.
  12006. return Pred && Pred->implies(P);
  12007. }
  12008. NewPreds->insert(P);
  12009. return true;
  12010. }
  12011. bool addOverflowAssumption(const SCEVAddRecExpr *AR,
  12012. SCEVWrapPredicate::IncrementWrapFlags AddedFlags) {
  12013. auto *A = SE.getWrapPredicate(AR, AddedFlags);
  12014. return addOverflowAssumption(A);
  12015. }
  12016. // If \p Expr represents a PHINode, we try to see if it can be represented
  12017. // as an AddRec, possibly under a predicate (PHISCEVPred). If it is possible
  12018. // to add this predicate as a runtime overflow check, we return the AddRec.
  12019. // If \p Expr does not meet these conditions (is not a PHI node, or we
  12020. // couldn't create an AddRec for it, or couldn't add the predicate), we just
  12021. // return \p Expr.
  12022. const SCEV *convertToAddRecWithPreds(const SCEVUnknown *Expr) {
  12023. if (!isa<PHINode>(Expr->getValue()))
  12024. return Expr;
  12025. Optional<std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>>
  12026. PredicatedRewrite = SE.createAddRecFromPHIWithCasts(Expr);
  12027. if (!PredicatedRewrite)
  12028. return Expr;
  12029. for (auto *P : PredicatedRewrite->second){
  12030. // Wrap predicates from outer loops are not supported.
  12031. if (auto *WP = dyn_cast<const SCEVWrapPredicate>(P)) {
  12032. auto *AR = cast<const SCEVAddRecExpr>(WP->getExpr());
  12033. if (L != AR->getLoop())
  12034. return Expr;
  12035. }
  12036. if (!addOverflowAssumption(P))
  12037. return Expr;
  12038. }
  12039. return PredicatedRewrite->first;
  12040. }
  12041. SmallPtrSetImpl<const SCEVPredicate *> *NewPreds;
  12042. SCEVUnionPredicate *Pred;
  12043. const Loop *L;
  12044. };
  12045. } // end anonymous namespace
  12046. const SCEV *ScalarEvolution::rewriteUsingPredicate(const SCEV *S, const Loop *L,
  12047. SCEVUnionPredicate &Preds) {
  12048. return SCEVPredicateRewriter::rewrite(S, L, *this, nullptr, &Preds);
  12049. }
  12050. const SCEVAddRecExpr *ScalarEvolution::convertSCEVToAddRecWithPredicates(
  12051. const SCEV *S, const Loop *L,
  12052. SmallPtrSetImpl<const SCEVPredicate *> &Preds) {
  12053. SmallPtrSet<const SCEVPredicate *, 4> TransformPreds;
  12054. S = SCEVPredicateRewriter::rewrite(S, L, *this, &TransformPreds, nullptr);
  12055. auto *AddRec = dyn_cast<SCEVAddRecExpr>(S);
  12056. if (!AddRec)
  12057. return nullptr;
  12058. // Since the transformation was successful, we can now transfer the SCEV
  12059. // predicates.
  12060. for (auto *P : TransformPreds)
  12061. Preds.insert(P);
  12062. return AddRec;
  12063. }
  12064. /// SCEV predicates
  12065. SCEVPredicate::SCEVPredicate(const FoldingSetNodeIDRef ID,
  12066. SCEVPredicateKind Kind)
  12067. : FastID(ID), Kind(Kind) {}
  12068. SCEVEqualPredicate::SCEVEqualPredicate(const FoldingSetNodeIDRef ID,
  12069. const SCEV *LHS, const SCEV *RHS)
  12070. : SCEVPredicate(ID, P_Equal), LHS(LHS), RHS(RHS) {
  12071. assert(LHS->getType() == RHS->getType() && "LHS and RHS types don't match");
  12072. assert(LHS != RHS && "LHS and RHS are the same SCEV");
  12073. }
  12074. bool SCEVEqualPredicate::implies(const SCEVPredicate *N) const {
  12075. const auto *Op = dyn_cast<SCEVEqualPredicate>(N);
  12076. if (!Op)
  12077. return false;
  12078. return Op->LHS == LHS && Op->RHS == RHS;
  12079. }
  12080. bool SCEVEqualPredicate::isAlwaysTrue() const { return false; }
  12081. const SCEV *SCEVEqualPredicate::getExpr() const { return LHS; }
  12082. void SCEVEqualPredicate::print(raw_ostream &OS, unsigned Depth) const {
  12083. OS.indent(Depth) << "Equal predicate: " << *LHS << " == " << *RHS << "\n";
  12084. }
  12085. SCEVWrapPredicate::SCEVWrapPredicate(const FoldingSetNodeIDRef ID,
  12086. const SCEVAddRecExpr *AR,
  12087. IncrementWrapFlags Flags)
  12088. : SCEVPredicate(ID, P_Wrap), AR(AR), Flags(Flags) {}
  12089. const SCEV *SCEVWrapPredicate::getExpr() const { return AR; }
  12090. bool SCEVWrapPredicate::implies(const SCEVPredicate *N) const {
  12091. const auto *Op = dyn_cast<SCEVWrapPredicate>(N);
  12092. return Op && Op->AR == AR && setFlags(Flags, Op->Flags) == Flags;
  12093. }
  12094. bool SCEVWrapPredicate::isAlwaysTrue() const {
  12095. SCEV::NoWrapFlags ScevFlags = AR->getNoWrapFlags();
  12096. IncrementWrapFlags IFlags = Flags;
  12097. if (ScalarEvolution::setFlags(ScevFlags, SCEV::FlagNSW) == ScevFlags)
  12098. IFlags = clearFlags(IFlags, IncrementNSSW);
  12099. return IFlags == IncrementAnyWrap;
  12100. }
  12101. void SCEVWrapPredicate::print(raw_ostream &OS, unsigned Depth) const {
  12102. OS.indent(Depth) << *getExpr() << " Added Flags: ";
  12103. if (SCEVWrapPredicate::IncrementNUSW & getFlags())
  12104. OS << "<nusw>";
  12105. if (SCEVWrapPredicate::IncrementNSSW & getFlags())
  12106. OS << "<nssw>";
  12107. OS << "\n";
  12108. }
  12109. SCEVWrapPredicate::IncrementWrapFlags
  12110. SCEVWrapPredicate::getImpliedFlags(const SCEVAddRecExpr *AR,
  12111. ScalarEvolution &SE) {
  12112. IncrementWrapFlags ImpliedFlags = IncrementAnyWrap;
  12113. SCEV::NoWrapFlags StaticFlags = AR->getNoWrapFlags();
  12114. // We can safely transfer the NSW flag as NSSW.
  12115. if (ScalarEvolution::setFlags(StaticFlags, SCEV::FlagNSW) == StaticFlags)
  12116. ImpliedFlags = IncrementNSSW;
  12117. if (ScalarEvolution::setFlags(StaticFlags, SCEV::FlagNUW) == StaticFlags) {
  12118. // If the increment is positive, the SCEV NUW flag will also imply the
  12119. // WrapPredicate NUSW flag.
  12120. if (const auto *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(SE)))
  12121. if (Step->getValue()->getValue().isNonNegative())
  12122. ImpliedFlags = setFlags(ImpliedFlags, IncrementNUSW);
  12123. }
  12124. return ImpliedFlags;
  12125. }
  12126. /// Union predicates don't get cached so create a dummy set ID for it.
  12127. SCEVUnionPredicate::SCEVUnionPredicate()
  12128. : SCEVPredicate(FoldingSetNodeIDRef(nullptr, 0), P_Union) {}
  12129. bool SCEVUnionPredicate::isAlwaysTrue() const {
  12130. return all_of(Preds,
  12131. [](const SCEVPredicate *I) { return I->isAlwaysTrue(); });
  12132. }
  12133. ArrayRef<const SCEVPredicate *>
  12134. SCEVUnionPredicate::getPredicatesForExpr(const SCEV *Expr) {
  12135. auto I = SCEVToPreds.find(Expr);
  12136. if (I == SCEVToPreds.end())
  12137. return ArrayRef<const SCEVPredicate *>();
  12138. return I->second;
  12139. }
  12140. bool SCEVUnionPredicate::implies(const SCEVPredicate *N) const {
  12141. if (const auto *Set = dyn_cast<SCEVUnionPredicate>(N))
  12142. return all_of(Set->Preds,
  12143. [this](const SCEVPredicate *I) { return this->implies(I); });
  12144. auto ScevPredsIt = SCEVToPreds.find(N->getExpr());
  12145. if (ScevPredsIt == SCEVToPreds.end())
  12146. return false;
  12147. auto &SCEVPreds = ScevPredsIt->second;
  12148. return any_of(SCEVPreds,
  12149. [N](const SCEVPredicate *I) { return I->implies(N); });
  12150. }
  12151. const SCEV *SCEVUnionPredicate::getExpr() const { return nullptr; }
  12152. void SCEVUnionPredicate::print(raw_ostream &OS, unsigned Depth) const {
  12153. for (auto Pred : Preds)
  12154. Pred->print(OS, Depth);
  12155. }
  12156. void SCEVUnionPredicate::add(const SCEVPredicate *N) {
  12157. if (const auto *Set = dyn_cast<SCEVUnionPredicate>(N)) {
  12158. for (auto Pred : Set->Preds)
  12159. add(Pred);
  12160. return;
  12161. }
  12162. if (implies(N))
  12163. return;
  12164. const SCEV *Key = N->getExpr();
  12165. assert(Key && "Only SCEVUnionPredicate doesn't have an "
  12166. " associated expression!");
  12167. SCEVToPreds[Key].push_back(N);
  12168. Preds.push_back(N);
  12169. }
  12170. PredicatedScalarEvolution::PredicatedScalarEvolution(ScalarEvolution &SE,
  12171. Loop &L)
  12172. : SE(SE), L(L) {}
  12173. void ScalarEvolution::registerUser(const SCEV *User,
  12174. ArrayRef<const SCEV *> Ops) {
  12175. for (auto *Op : Ops)
  12176. // We do not expect that forgetting cached data for SCEVConstants will ever
  12177. // open any prospects for sharpening or introduce any correctness issues,
  12178. // so we don't bother storing their dependencies.
  12179. if (!isa<SCEVConstant>(Op))
  12180. SCEVUsers[Op].insert(User);
  12181. }
  12182. const SCEV *PredicatedScalarEvolution::getSCEV(Value *V) {
  12183. const SCEV *Expr = SE.getSCEV(V);
  12184. RewriteEntry &Entry = RewriteMap[Expr];
  12185. // If we already have an entry and the version matches, return it.
  12186. if (Entry.second && Generation == Entry.first)
  12187. return Entry.second;
  12188. // We found an entry but it's stale. Rewrite the stale entry
  12189. // according to the current predicate.
  12190. if (Entry.second)
  12191. Expr = Entry.second;
  12192. const SCEV *NewSCEV = SE.rewriteUsingPredicate(Expr, &L, Preds);
  12193. Entry = {Generation, NewSCEV};
  12194. return NewSCEV;
  12195. }
  12196. const SCEV *PredicatedScalarEvolution::getBackedgeTakenCount() {
  12197. if (!BackedgeCount) {
  12198. SCEVUnionPredicate BackedgePred;
  12199. BackedgeCount = SE.getPredicatedBackedgeTakenCount(&L, BackedgePred);
  12200. addPredicate(BackedgePred);
  12201. }
  12202. return BackedgeCount;
  12203. }
  12204. void PredicatedScalarEvolution::addPredicate(const SCEVPredicate &Pred) {
  12205. if (Preds.implies(&Pred))
  12206. return;
  12207. Preds.add(&Pred);
  12208. updateGeneration();
  12209. }
  12210. const SCEVUnionPredicate &PredicatedScalarEvolution::getUnionPredicate() const {
  12211. return Preds;
  12212. }
  12213. void PredicatedScalarEvolution::updateGeneration() {
  12214. // If the generation number wrapped recompute everything.
  12215. if (++Generation == 0) {
  12216. for (auto &II : RewriteMap) {
  12217. const SCEV *Rewritten = II.second.second;
  12218. II.second = {Generation, SE.rewriteUsingPredicate(Rewritten, &L, Preds)};
  12219. }
  12220. }
  12221. }
  12222. void PredicatedScalarEvolution::setNoOverflow(
  12223. Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags) {
  12224. const SCEV *Expr = getSCEV(V);
  12225. const auto *AR = cast<SCEVAddRecExpr>(Expr);
  12226. auto ImpliedFlags = SCEVWrapPredicate::getImpliedFlags(AR, SE);
  12227. // Clear the statically implied flags.
  12228. Flags = SCEVWrapPredicate::clearFlags(Flags, ImpliedFlags);
  12229. addPredicate(*SE.getWrapPredicate(AR, Flags));
  12230. auto II = FlagsMap.insert({V, Flags});
  12231. if (!II.second)
  12232. II.first->second = SCEVWrapPredicate::setFlags(Flags, II.first->second);
  12233. }
  12234. bool PredicatedScalarEvolution::hasNoOverflow(
  12235. Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags) {
  12236. const SCEV *Expr = getSCEV(V);
  12237. const auto *AR = cast<SCEVAddRecExpr>(Expr);
  12238. Flags = SCEVWrapPredicate::clearFlags(
  12239. Flags, SCEVWrapPredicate::getImpliedFlags(AR, SE));
  12240. auto II = FlagsMap.find(V);
  12241. if (II != FlagsMap.end())
  12242. Flags = SCEVWrapPredicate::clearFlags(Flags, II->second);
  12243. return Flags == SCEVWrapPredicate::IncrementAnyWrap;
  12244. }
  12245. const SCEVAddRecExpr *PredicatedScalarEvolution::getAsAddRec(Value *V) {
  12246. const SCEV *Expr = this->getSCEV(V);
  12247. SmallPtrSet<const SCEVPredicate *, 4> NewPreds;
  12248. auto *New = SE.convertSCEVToAddRecWithPredicates(Expr, &L, NewPreds);
  12249. if (!New)
  12250. return nullptr;
  12251. for (auto *P : NewPreds)
  12252. Preds.add(P);
  12253. updateGeneration();
  12254. RewriteMap[SE.getSCEV(V)] = {Generation, New};
  12255. return New;
  12256. }
  12257. PredicatedScalarEvolution::PredicatedScalarEvolution(
  12258. const PredicatedScalarEvolution &Init)
  12259. : RewriteMap(Init.RewriteMap), SE(Init.SE), L(Init.L), Preds(Init.Preds),
  12260. Generation(Init.Generation), BackedgeCount(Init.BackedgeCount) {
  12261. for (auto I : Init.FlagsMap)
  12262. FlagsMap.insert(I);
  12263. }
  12264. void PredicatedScalarEvolution::print(raw_ostream &OS, unsigned Depth) const {
  12265. // For each block.
  12266. for (auto *BB : L.getBlocks())
  12267. for (auto &I : *BB) {
  12268. if (!SE.isSCEVable(I.getType()))
  12269. continue;
  12270. auto *Expr = SE.getSCEV(&I);
  12271. auto II = RewriteMap.find(Expr);
  12272. if (II == RewriteMap.end())
  12273. continue;
  12274. // Don't print things that are not interesting.
  12275. if (II->second.second == Expr)
  12276. continue;
  12277. OS.indent(Depth) << "[PSE]" << I << ":\n";
  12278. OS.indent(Depth + 2) << *Expr << "\n";
  12279. OS.indent(Depth + 2) << "--> " << *II->second.second << "\n";
  12280. }
  12281. }
  12282. // Match the mathematical pattern A - (A / B) * B, where A and B can be
  12283. // arbitrary expressions. Also match zext (trunc A to iB) to iY, which is used
  12284. // for URem with constant power-of-2 second operands.
  12285. // It's not always easy, as A and B can be folded (imagine A is X / 2, and B is
  12286. // 4, A / B becomes X / 8).
  12287. bool ScalarEvolution::matchURem(const SCEV *Expr, const SCEV *&LHS,
  12288. const SCEV *&RHS) {
  12289. // Try to match 'zext (trunc A to iB) to iY', which is used
  12290. // for URem with constant power-of-2 second operands. Make sure the size of
  12291. // the operand A matches the size of the whole expressions.
  12292. if (const auto *ZExt = dyn_cast<SCEVZeroExtendExpr>(Expr))
  12293. if (const auto *Trunc = dyn_cast<SCEVTruncateExpr>(ZExt->getOperand(0))) {
  12294. LHS = Trunc->getOperand();
  12295. // Bail out if the type of the LHS is larger than the type of the
  12296. // expression for now.
  12297. if (getTypeSizeInBits(LHS->getType()) >
  12298. getTypeSizeInBits(Expr->getType()))
  12299. return false;
  12300. if (LHS->getType() != Expr->getType())
  12301. LHS = getZeroExtendExpr(LHS, Expr->getType());
  12302. RHS = getConstant(APInt(getTypeSizeInBits(Expr->getType()), 1)
  12303. << getTypeSizeInBits(Trunc->getType()));
  12304. return true;
  12305. }
  12306. const auto *Add = dyn_cast<SCEVAddExpr>(Expr);
  12307. if (Add == nullptr || Add->getNumOperands() != 2)
  12308. return false;
  12309. const SCEV *A = Add->getOperand(1);
  12310. const auto *Mul = dyn_cast<SCEVMulExpr>(Add->getOperand(0));
  12311. if (Mul == nullptr)
  12312. return false;
  12313. const auto MatchURemWithDivisor = [&](const SCEV *B) {
  12314. // (SomeExpr + (-(SomeExpr / B) * B)).
  12315. if (Expr == getURemExpr(A, B)) {
  12316. LHS = A;
  12317. RHS = B;
  12318. return true;
  12319. }
  12320. return false;
  12321. };
  12322. // (SomeExpr + (-1 * (SomeExpr / B) * B)).
  12323. if (Mul->getNumOperands() == 3 && isa<SCEVConstant>(Mul->getOperand(0)))
  12324. return MatchURemWithDivisor(Mul->getOperand(1)) ||
  12325. MatchURemWithDivisor(Mul->getOperand(2));
  12326. // (SomeExpr + ((-SomeExpr / B) * B)) or (SomeExpr + ((SomeExpr / B) * -B)).
  12327. if (Mul->getNumOperands() == 2)
  12328. return MatchURemWithDivisor(Mul->getOperand(1)) ||
  12329. MatchURemWithDivisor(Mul->getOperand(0)) ||
  12330. MatchURemWithDivisor(getNegativeSCEV(Mul->getOperand(1))) ||
  12331. MatchURemWithDivisor(getNegativeSCEV(Mul->getOperand(0)));
  12332. return false;
  12333. }
  12334. const SCEV *
  12335. ScalarEvolution::computeSymbolicMaxBackedgeTakenCount(const Loop *L) {
  12336. SmallVector<BasicBlock*, 16> ExitingBlocks;
  12337. L->getExitingBlocks(ExitingBlocks);
  12338. // Form an expression for the maximum exit count possible for this loop. We
  12339. // merge the max and exact information to approximate a version of
  12340. // getConstantMaxBackedgeTakenCount which isn't restricted to just constants.
  12341. SmallVector<const SCEV*, 4> ExitCounts;
  12342. for (BasicBlock *ExitingBB : ExitingBlocks) {
  12343. const SCEV *ExitCount = getExitCount(L, ExitingBB);
  12344. if (isa<SCEVCouldNotCompute>(ExitCount))
  12345. ExitCount = getExitCount(L, ExitingBB,
  12346. ScalarEvolution::ConstantMaximum);
  12347. if (!isa<SCEVCouldNotCompute>(ExitCount)) {
  12348. assert(DT.dominates(ExitingBB, L->getLoopLatch()) &&
  12349. "We should only have known counts for exiting blocks that "
  12350. "dominate latch!");
  12351. ExitCounts.push_back(ExitCount);
  12352. }
  12353. }
  12354. if (ExitCounts.empty())
  12355. return getCouldNotCompute();
  12356. return getUMinFromMismatchedTypes(ExitCounts);
  12357. }
  12358. /// A rewriter to replace SCEV expressions in Map with the corresponding entry
  12359. /// in the map. It skips AddRecExpr because we cannot guarantee that the
  12360. /// replacement is loop invariant in the loop of the AddRec.
  12361. ///
  12362. /// At the moment only rewriting SCEVUnknown and SCEVZeroExtendExpr is
  12363. /// supported.
  12364. class SCEVLoopGuardRewriter : public SCEVRewriteVisitor<SCEVLoopGuardRewriter> {
  12365. const DenseMap<const SCEV *, const SCEV *> &Map;
  12366. public:
  12367. SCEVLoopGuardRewriter(ScalarEvolution &SE,
  12368. DenseMap<const SCEV *, const SCEV *> &M)
  12369. : SCEVRewriteVisitor(SE), Map(M) {}
  12370. const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { return Expr; }
  12371. const SCEV *visitUnknown(const SCEVUnknown *Expr) {
  12372. auto I = Map.find(Expr);
  12373. if (I == Map.end())
  12374. return Expr;
  12375. return I->second;
  12376. }
  12377. const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
  12378. auto I = Map.find(Expr);
  12379. if (I == Map.end())
  12380. return SCEVRewriteVisitor<SCEVLoopGuardRewriter>::visitZeroExtendExpr(
  12381. Expr);
  12382. return I->second;
  12383. }
  12384. };
  12385. const SCEV *ScalarEvolution::applyLoopGuards(const SCEV *Expr, const Loop *L) {
  12386. SmallVector<const SCEV *> ExprsToRewrite;
  12387. auto CollectCondition = [&](ICmpInst::Predicate Predicate, const SCEV *LHS,
  12388. const SCEV *RHS,
  12389. DenseMap<const SCEV *, const SCEV *>
  12390. &RewriteMap) {
  12391. // WARNING: It is generally unsound to apply any wrap flags to the proposed
  12392. // replacement SCEV which isn't directly implied by the structure of that
  12393. // SCEV. In particular, using contextual facts to imply flags is *NOT*
  12394. // legal. See the scoping rules for flags in the header to understand why.
  12395. // If LHS is a constant, apply information to the other expression.
  12396. if (isa<SCEVConstant>(LHS)) {
  12397. std::swap(LHS, RHS);
  12398. Predicate = CmpInst::getSwappedPredicate(Predicate);
  12399. }
  12400. // Check for a condition of the form (-C1 + X < C2). InstCombine will
  12401. // create this form when combining two checks of the form (X u< C2 + C1) and
  12402. // (X >=u C1).
  12403. auto MatchRangeCheckIdiom = [this, Predicate, LHS, RHS, &RewriteMap,
  12404. &ExprsToRewrite]() {
  12405. auto *AddExpr = dyn_cast<SCEVAddExpr>(LHS);
  12406. if (!AddExpr || AddExpr->getNumOperands() != 2)
  12407. return false;
  12408. auto *C1 = dyn_cast<SCEVConstant>(AddExpr->getOperand(0));
  12409. auto *LHSUnknown = dyn_cast<SCEVUnknown>(AddExpr->getOperand(1));
  12410. auto *C2 = dyn_cast<SCEVConstant>(RHS);
  12411. if (!C1 || !C2 || !LHSUnknown)
  12412. return false;
  12413. auto ExactRegion =
  12414. ConstantRange::makeExactICmpRegion(Predicate, C2->getAPInt())
  12415. .sub(C1->getAPInt());
  12416. // Bail out, unless we have a non-wrapping, monotonic range.
  12417. if (ExactRegion.isWrappedSet() || ExactRegion.isFullSet())
  12418. return false;
  12419. auto I = RewriteMap.find(LHSUnknown);
  12420. const SCEV *RewrittenLHS = I != RewriteMap.end() ? I->second : LHSUnknown;
  12421. RewriteMap[LHSUnknown] = getUMaxExpr(
  12422. getConstant(ExactRegion.getUnsignedMin()),
  12423. getUMinExpr(RewrittenLHS, getConstant(ExactRegion.getUnsignedMax())));
  12424. ExprsToRewrite.push_back(LHSUnknown);
  12425. return true;
  12426. };
  12427. if (MatchRangeCheckIdiom())
  12428. return;
  12429. // If we have LHS == 0, check if LHS is computing a property of some unknown
  12430. // SCEV %v which we can rewrite %v to express explicitly.
  12431. const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS);
  12432. if (Predicate == CmpInst::ICMP_EQ && RHSC &&
  12433. RHSC->getValue()->isNullValue()) {
  12434. // If LHS is A % B, i.e. A % B == 0, rewrite A to (A /u B) * B to
  12435. // explicitly express that.
  12436. const SCEV *URemLHS = nullptr;
  12437. const SCEV *URemRHS = nullptr;
  12438. if (matchURem(LHS, URemLHS, URemRHS)) {
  12439. if (const SCEVUnknown *LHSUnknown = dyn_cast<SCEVUnknown>(URemLHS)) {
  12440. auto Multiple = getMulExpr(getUDivExpr(URemLHS, URemRHS), URemRHS);
  12441. RewriteMap[LHSUnknown] = Multiple;
  12442. ExprsToRewrite.push_back(LHSUnknown);
  12443. return;
  12444. }
  12445. }
  12446. }
  12447. // Do not apply information for constants or if RHS contains an AddRec.
  12448. if (isa<SCEVConstant>(LHS) || containsAddRecurrence(RHS))
  12449. return;
  12450. // If RHS is SCEVUnknown, make sure the information is applied to it.
  12451. if (!isa<SCEVUnknown>(LHS) && isa<SCEVUnknown>(RHS)) {
  12452. std::swap(LHS, RHS);
  12453. Predicate = CmpInst::getSwappedPredicate(Predicate);
  12454. }
  12455. // Limit to expressions that can be rewritten.
  12456. if (!isa<SCEVUnknown>(LHS) && !isa<SCEVZeroExtendExpr>(LHS))
  12457. return;
  12458. // Check whether LHS has already been rewritten. In that case we want to
  12459. // chain further rewrites onto the already rewritten value.
  12460. auto I = RewriteMap.find(LHS);
  12461. const SCEV *RewrittenLHS = I != RewriteMap.end() ? I->second : LHS;
  12462. const SCEV *RewrittenRHS = nullptr;
  12463. switch (Predicate) {
  12464. case CmpInst::ICMP_ULT:
  12465. RewrittenRHS =
  12466. getUMinExpr(RewrittenLHS, getMinusSCEV(RHS, getOne(RHS->getType())));
  12467. break;
  12468. case CmpInst::ICMP_SLT:
  12469. RewrittenRHS =
  12470. getSMinExpr(RewrittenLHS, getMinusSCEV(RHS, getOne(RHS->getType())));
  12471. break;
  12472. case CmpInst::ICMP_ULE:
  12473. RewrittenRHS = getUMinExpr(RewrittenLHS, RHS);
  12474. break;
  12475. case CmpInst::ICMP_SLE:
  12476. RewrittenRHS = getSMinExpr(RewrittenLHS, RHS);
  12477. break;
  12478. case CmpInst::ICMP_UGT:
  12479. RewrittenRHS =
  12480. getUMaxExpr(RewrittenLHS, getAddExpr(RHS, getOne(RHS->getType())));
  12481. break;
  12482. case CmpInst::ICMP_SGT:
  12483. RewrittenRHS =
  12484. getSMaxExpr(RewrittenLHS, getAddExpr(RHS, getOne(RHS->getType())));
  12485. break;
  12486. case CmpInst::ICMP_UGE:
  12487. RewrittenRHS = getUMaxExpr(RewrittenLHS, RHS);
  12488. break;
  12489. case CmpInst::ICMP_SGE:
  12490. RewrittenRHS = getSMaxExpr(RewrittenLHS, RHS);
  12491. break;
  12492. case CmpInst::ICMP_EQ:
  12493. if (isa<SCEVConstant>(RHS))
  12494. RewrittenRHS = RHS;
  12495. break;
  12496. case CmpInst::ICMP_NE:
  12497. if (isa<SCEVConstant>(RHS) &&
  12498. cast<SCEVConstant>(RHS)->getValue()->isNullValue())
  12499. RewrittenRHS = getUMaxExpr(RewrittenLHS, getOne(RHS->getType()));
  12500. break;
  12501. default:
  12502. break;
  12503. }
  12504. if (RewrittenRHS) {
  12505. RewriteMap[LHS] = RewrittenRHS;
  12506. if (LHS == RewrittenLHS)
  12507. ExprsToRewrite.push_back(LHS);
  12508. }
  12509. };
  12510. // First, collect conditions from dominating branches. Starting at the loop
  12511. // predecessor, climb up the predecessor chain, as long as there are
  12512. // predecessors that can be found that have unique successors leading to the
  12513. // original header.
  12514. // TODO: share this logic with isLoopEntryGuardedByCond.
  12515. SmallVector<std::pair<Value *, bool>> Terms;
  12516. for (std::pair<const BasicBlock *, const BasicBlock *> Pair(
  12517. L->getLoopPredecessor(), L->getHeader());
  12518. Pair.first; Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) {
  12519. const BranchInst *LoopEntryPredicate =
  12520. dyn_cast<BranchInst>(Pair.first->getTerminator());
  12521. if (!LoopEntryPredicate || LoopEntryPredicate->isUnconditional())
  12522. continue;
  12523. Terms.emplace_back(LoopEntryPredicate->getCondition(),
  12524. LoopEntryPredicate->getSuccessor(0) == Pair.second);
  12525. }
  12526. // Now apply the information from the collected conditions to RewriteMap.
  12527. // Conditions are processed in reverse order, so the earliest conditions is
  12528. // processed first. This ensures the SCEVs with the shortest dependency chains
  12529. // are constructed first.
  12530. DenseMap<const SCEV *, const SCEV *> RewriteMap;
  12531. for (auto &E : reverse(Terms)) {
  12532. bool EnterIfTrue = E.second;
  12533. SmallVector<Value *, 8> Worklist;
  12534. SmallPtrSet<Value *, 8> Visited;
  12535. Worklist.push_back(E.first);
  12536. while (!Worklist.empty()) {
  12537. Value *Cond = Worklist.pop_back_val();
  12538. if (!Visited.insert(Cond).second)
  12539. continue;
  12540. if (auto *Cmp = dyn_cast<ICmpInst>(Cond)) {
  12541. auto Predicate =
  12542. EnterIfTrue ? Cmp->getPredicate() : Cmp->getInversePredicate();
  12543. CollectCondition(Predicate, getSCEV(Cmp->getOperand(0)),
  12544. getSCEV(Cmp->getOperand(1)), RewriteMap);
  12545. continue;
  12546. }
  12547. Value *L, *R;
  12548. if (EnterIfTrue ? match(Cond, m_LogicalAnd(m_Value(L), m_Value(R)))
  12549. : match(Cond, m_LogicalOr(m_Value(L), m_Value(R)))) {
  12550. Worklist.push_back(L);
  12551. Worklist.push_back(R);
  12552. }
  12553. }
  12554. }
  12555. // Also collect information from assumptions dominating the loop.
  12556. for (auto &AssumeVH : AC.assumptions()) {
  12557. if (!AssumeVH)
  12558. continue;
  12559. auto *AssumeI = cast<CallInst>(AssumeVH);
  12560. auto *Cmp = dyn_cast<ICmpInst>(AssumeI->getOperand(0));
  12561. if (!Cmp || !DT.dominates(AssumeI, L->getHeader()))
  12562. continue;
  12563. CollectCondition(Cmp->getPredicate(), getSCEV(Cmp->getOperand(0)),
  12564. getSCEV(Cmp->getOperand(1)), RewriteMap);
  12565. }
  12566. if (RewriteMap.empty())
  12567. return Expr;
  12568. // Now that all rewrite information is collect, rewrite the collected
  12569. // expressions with the information in the map. This applies information to
  12570. // sub-expressions.
  12571. if (ExprsToRewrite.size() > 1) {
  12572. for (const SCEV *Expr : ExprsToRewrite) {
  12573. const SCEV *RewriteTo = RewriteMap[Expr];
  12574. RewriteMap.erase(Expr);
  12575. SCEVLoopGuardRewriter Rewriter(*this, RewriteMap);
  12576. RewriteMap.insert({Expr, Rewriter.visit(RewriteTo)});
  12577. }
  12578. }
  12579. SCEVLoopGuardRewriter Rewriter(*this, RewriteMap);
  12580. return Rewriter.visit(Expr);
  12581. }