InstructionCombining.cpp 187 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769
  1. //===- InstructionCombining.cpp - Combine multiple instructions -----------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // InstructionCombining - Combine instructions to form fewer, simple
  10. // instructions. This pass does not modify the CFG. This pass is where
  11. // algebraic simplification happens.
  12. //
  13. // This pass combines things like:
  14. // %Y = add i32 %X, 1
  15. // %Z = add i32 %Y, 1
  16. // into:
  17. // %Z = add i32 %X, 2
  18. //
  19. // This is a simple worklist driven algorithm.
  20. //
  21. // This pass guarantees that the following canonicalizations are performed on
  22. // the program:
  23. // 1. If a binary operator has a constant operand, it is moved to the RHS
  24. // 2. Bitwise operators with constant operands are always grouped so that
  25. // shifts are performed first, then or's, then and's, then xor's.
  26. // 3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible
  27. // 4. All cmp instructions on boolean values are replaced with logical ops
  28. // 5. add X, X is represented as (X*2) => (X << 1)
  29. // 6. Multiplies with a power-of-two constant argument are transformed into
  30. // shifts.
  31. // ... etc.
  32. //
  33. //===----------------------------------------------------------------------===//
  34. #include "InstCombineInternal.h"
  35. #include "llvm-c/Initialization.h"
  36. #include "llvm-c/Transforms/InstCombine.h"
  37. #include "llvm/ADT/APInt.h"
  38. #include "llvm/ADT/ArrayRef.h"
  39. #include "llvm/ADT/DenseMap.h"
  40. #include "llvm/ADT/SmallPtrSet.h"
  41. #include "llvm/ADT/SmallVector.h"
  42. #include "llvm/ADT/Statistic.h"
  43. #include "llvm/Analysis/AliasAnalysis.h"
  44. #include "llvm/Analysis/AssumptionCache.h"
  45. #include "llvm/Analysis/BasicAliasAnalysis.h"
  46. #include "llvm/Analysis/BlockFrequencyInfo.h"
  47. #include "llvm/Analysis/CFG.h"
  48. #include "llvm/Analysis/ConstantFolding.h"
  49. #include "llvm/Analysis/EHPersonalities.h"
  50. #include "llvm/Analysis/GlobalsModRef.h"
  51. #include "llvm/Analysis/InstructionSimplify.h"
  52. #include "llvm/Analysis/LazyBlockFrequencyInfo.h"
  53. #include "llvm/Analysis/LoopInfo.h"
  54. #include "llvm/Analysis/MemoryBuiltins.h"
  55. #include "llvm/Analysis/OptimizationRemarkEmitter.h"
  56. #include "llvm/Analysis/ProfileSummaryInfo.h"
  57. #include "llvm/Analysis/TargetFolder.h"
  58. #include "llvm/Analysis/TargetLibraryInfo.h"
  59. #include "llvm/Analysis/TargetTransformInfo.h"
  60. #include "llvm/Analysis/Utils/Local.h"
  61. #include "llvm/Analysis/ValueTracking.h"
  62. #include "llvm/Analysis/VectorUtils.h"
  63. #include "llvm/IR/BasicBlock.h"
  64. #include "llvm/IR/CFG.h"
  65. #include "llvm/IR/Constant.h"
  66. #include "llvm/IR/Constants.h"
  67. #include "llvm/IR/DIBuilder.h"
  68. #include "llvm/IR/DataLayout.h"
  69. #include "llvm/IR/DebugInfo.h"
  70. #include "llvm/IR/DerivedTypes.h"
  71. #include "llvm/IR/Dominators.h"
  72. #include "llvm/IR/Function.h"
  73. #include "llvm/IR/GetElementPtrTypeIterator.h"
  74. #include "llvm/IR/IRBuilder.h"
  75. #include "llvm/IR/InstrTypes.h"
  76. #include "llvm/IR/Instruction.h"
  77. #include "llvm/IR/Instructions.h"
  78. #include "llvm/IR/IntrinsicInst.h"
  79. #include "llvm/IR/Intrinsics.h"
  80. #include "llvm/IR/LegacyPassManager.h"
  81. #include "llvm/IR/Metadata.h"
  82. #include "llvm/IR/Operator.h"
  83. #include "llvm/IR/PassManager.h"
  84. #include "llvm/IR/PatternMatch.h"
  85. #include "llvm/IR/Type.h"
  86. #include "llvm/IR/Use.h"
  87. #include "llvm/IR/User.h"
  88. #include "llvm/IR/Value.h"
  89. #include "llvm/IR/ValueHandle.h"
  90. #include "llvm/InitializePasses.h"
  91. #include "llvm/Support/Casting.h"
  92. #include "llvm/Support/CommandLine.h"
  93. #include "llvm/Support/Compiler.h"
  94. #include "llvm/Support/Debug.h"
  95. #include "llvm/Support/DebugCounter.h"
  96. #include "llvm/Support/ErrorHandling.h"
  97. #include "llvm/Support/KnownBits.h"
  98. #include "llvm/Support/raw_ostream.h"
  99. #include "llvm/Transforms/InstCombine/InstCombine.h"
  100. #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  101. #include "llvm/Transforms/Utils/Local.h"
  102. #include <algorithm>
  103. #include <cassert>
  104. #include <cstdint>
  105. #include <memory>
  106. #include <optional>
  107. #include <string>
  108. #include <utility>
  109. #define DEBUG_TYPE "instcombine"
  110. #include "llvm/Transforms/Utils/InstructionWorklist.h"
  111. #include <optional>
  112. using namespace llvm;
  113. using namespace llvm::PatternMatch;
  114. STATISTIC(NumWorklistIterations,
  115. "Number of instruction combining iterations performed");
  116. STATISTIC(NumCombined , "Number of insts combined");
  117. STATISTIC(NumConstProp, "Number of constant folds");
  118. STATISTIC(NumDeadInst , "Number of dead inst eliminated");
  119. STATISTIC(NumSunkInst , "Number of instructions sunk");
  120. STATISTIC(NumExpand, "Number of expansions");
  121. STATISTIC(NumFactor , "Number of factorizations");
  122. STATISTIC(NumReassoc , "Number of reassociations");
  123. DEBUG_COUNTER(VisitCounter, "instcombine-visit",
  124. "Controls which instructions are visited");
  125. // FIXME: these limits eventually should be as low as 2.
  126. static constexpr unsigned InstCombineDefaultMaxIterations = 1000;
  127. #ifndef NDEBUG
  128. static constexpr unsigned InstCombineDefaultInfiniteLoopThreshold = 100;
  129. #else
  130. static constexpr unsigned InstCombineDefaultInfiniteLoopThreshold = 1000;
  131. #endif
  132. static cl::opt<bool>
  133. EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"),
  134. cl::init(true));
  135. static cl::opt<unsigned> MaxSinkNumUsers(
  136. "instcombine-max-sink-users", cl::init(32),
  137. cl::desc("Maximum number of undroppable users for instruction sinking"));
  138. static cl::opt<unsigned> LimitMaxIterations(
  139. "instcombine-max-iterations",
  140. cl::desc("Limit the maximum number of instruction combining iterations"),
  141. cl::init(InstCombineDefaultMaxIterations));
  142. static cl::opt<unsigned> InfiniteLoopDetectionThreshold(
  143. "instcombine-infinite-loop-threshold",
  144. cl::desc("Number of instruction combining iterations considered an "
  145. "infinite loop"),
  146. cl::init(InstCombineDefaultInfiniteLoopThreshold), cl::Hidden);
  147. static cl::opt<unsigned>
  148. MaxArraySize("instcombine-maxarray-size", cl::init(1024),
  149. cl::desc("Maximum array size considered when doing a combine"));
  150. // FIXME: Remove this flag when it is no longer necessary to convert
  151. // llvm.dbg.declare to avoid inaccurate debug info. Setting this to false
  152. // increases variable availability at the cost of accuracy. Variables that
  153. // cannot be promoted by mem2reg or SROA will be described as living in memory
  154. // for their entire lifetime. However, passes like DSE and instcombine can
  155. // delete stores to the alloca, leading to misleading and inaccurate debug
  156. // information. This flag can be removed when those passes are fixed.
  157. static cl::opt<unsigned> ShouldLowerDbgDeclare("instcombine-lower-dbg-declare",
  158. cl::Hidden, cl::init(true));
  159. std::optional<Instruction *>
  160. InstCombiner::targetInstCombineIntrinsic(IntrinsicInst &II) {
  161. // Handle target specific intrinsics
  162. if (II.getCalledFunction()->isTargetIntrinsic()) {
  163. return TTI.instCombineIntrinsic(*this, II);
  164. }
  165. return std::nullopt;
  166. }
  167. std::optional<Value *> InstCombiner::targetSimplifyDemandedUseBitsIntrinsic(
  168. IntrinsicInst &II, APInt DemandedMask, KnownBits &Known,
  169. bool &KnownBitsComputed) {
  170. // Handle target specific intrinsics
  171. if (II.getCalledFunction()->isTargetIntrinsic()) {
  172. return TTI.simplifyDemandedUseBitsIntrinsic(*this, II, DemandedMask, Known,
  173. KnownBitsComputed);
  174. }
  175. return std::nullopt;
  176. }
  177. std::optional<Value *> InstCombiner::targetSimplifyDemandedVectorEltsIntrinsic(
  178. IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2,
  179. APInt &UndefElts3,
  180. std::function<void(Instruction *, unsigned, APInt, APInt &)>
  181. SimplifyAndSetOp) {
  182. // Handle target specific intrinsics
  183. if (II.getCalledFunction()->isTargetIntrinsic()) {
  184. return TTI.simplifyDemandedVectorEltsIntrinsic(
  185. *this, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
  186. SimplifyAndSetOp);
  187. }
  188. return std::nullopt;
  189. }
  190. Value *InstCombinerImpl::EmitGEPOffset(User *GEP) {
  191. return llvm::emitGEPOffset(&Builder, DL, GEP);
  192. }
  193. /// Legal integers and common types are considered desirable. This is used to
  194. /// avoid creating instructions with types that may not be supported well by the
  195. /// the backend.
  196. /// NOTE: This treats i8, i16 and i32 specially because they are common
  197. /// types in frontend languages.
  198. bool InstCombinerImpl::isDesirableIntType(unsigned BitWidth) const {
  199. switch (BitWidth) {
  200. case 8:
  201. case 16:
  202. case 32:
  203. return true;
  204. default:
  205. return DL.isLegalInteger(BitWidth);
  206. }
  207. }
  208. /// Return true if it is desirable to convert an integer computation from a
  209. /// given bit width to a new bit width.
  210. /// We don't want to convert from a legal or desirable type (like i8) to an
  211. /// illegal type or from a smaller to a larger illegal type. A width of '1'
  212. /// is always treated as a desirable type because i1 is a fundamental type in
  213. /// IR, and there are many specialized optimizations for i1 types.
  214. /// Common/desirable widths are equally treated as legal to convert to, in
  215. /// order to open up more combining opportunities.
  216. bool InstCombinerImpl::shouldChangeType(unsigned FromWidth,
  217. unsigned ToWidth) const {
  218. bool FromLegal = FromWidth == 1 || DL.isLegalInteger(FromWidth);
  219. bool ToLegal = ToWidth == 1 || DL.isLegalInteger(ToWidth);
  220. // Convert to desirable widths even if they are not legal types.
  221. // Only shrink types, to prevent infinite loops.
  222. if (ToWidth < FromWidth && isDesirableIntType(ToWidth))
  223. return true;
  224. // If this is a legal or desiable integer from type, and the result would be
  225. // an illegal type, don't do the transformation.
  226. if ((FromLegal || isDesirableIntType(FromWidth)) && !ToLegal)
  227. return false;
  228. // Otherwise, if both are illegal, do not increase the size of the result. We
  229. // do allow things like i160 -> i64, but not i64 -> i160.
  230. if (!FromLegal && !ToLegal && ToWidth > FromWidth)
  231. return false;
  232. return true;
  233. }
  234. /// Return true if it is desirable to convert a computation from 'From' to 'To'.
  235. /// We don't want to convert from a legal to an illegal type or from a smaller
  236. /// to a larger illegal type. i1 is always treated as a legal type because it is
  237. /// a fundamental type in IR, and there are many specialized optimizations for
  238. /// i1 types.
  239. bool InstCombinerImpl::shouldChangeType(Type *From, Type *To) const {
  240. // TODO: This could be extended to allow vectors. Datalayout changes might be
  241. // needed to properly support that.
  242. if (!From->isIntegerTy() || !To->isIntegerTy())
  243. return false;
  244. unsigned FromWidth = From->getPrimitiveSizeInBits();
  245. unsigned ToWidth = To->getPrimitiveSizeInBits();
  246. return shouldChangeType(FromWidth, ToWidth);
  247. }
  248. // Return true, if No Signed Wrap should be maintained for I.
  249. // The No Signed Wrap flag can be kept if the operation "B (I.getOpcode) C",
  250. // where both B and C should be ConstantInts, results in a constant that does
  251. // not overflow. This function only handles the Add and Sub opcodes. For
  252. // all other opcodes, the function conservatively returns false.
  253. static bool maintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C) {
  254. auto *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
  255. if (!OBO || !OBO->hasNoSignedWrap())
  256. return false;
  257. // We reason about Add and Sub Only.
  258. Instruction::BinaryOps Opcode = I.getOpcode();
  259. if (Opcode != Instruction::Add && Opcode != Instruction::Sub)
  260. return false;
  261. const APInt *BVal, *CVal;
  262. if (!match(B, m_APInt(BVal)) || !match(C, m_APInt(CVal)))
  263. return false;
  264. bool Overflow = false;
  265. if (Opcode == Instruction::Add)
  266. (void)BVal->sadd_ov(*CVal, Overflow);
  267. else
  268. (void)BVal->ssub_ov(*CVal, Overflow);
  269. return !Overflow;
  270. }
  271. static bool hasNoUnsignedWrap(BinaryOperator &I) {
  272. auto *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
  273. return OBO && OBO->hasNoUnsignedWrap();
  274. }
  275. static bool hasNoSignedWrap(BinaryOperator &I) {
  276. auto *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
  277. return OBO && OBO->hasNoSignedWrap();
  278. }
  279. /// Conservatively clears subclassOptionalData after a reassociation or
  280. /// commutation. We preserve fast-math flags when applicable as they can be
  281. /// preserved.
  282. static void ClearSubclassDataAfterReassociation(BinaryOperator &I) {
  283. FPMathOperator *FPMO = dyn_cast<FPMathOperator>(&I);
  284. if (!FPMO) {
  285. I.clearSubclassOptionalData();
  286. return;
  287. }
  288. FastMathFlags FMF = I.getFastMathFlags();
  289. I.clearSubclassOptionalData();
  290. I.setFastMathFlags(FMF);
  291. }
  292. /// Combine constant operands of associative operations either before or after a
  293. /// cast to eliminate one of the associative operations:
  294. /// (op (cast (op X, C2)), C1) --> (cast (op X, op (C1, C2)))
  295. /// (op (cast (op X, C2)), C1) --> (op (cast X), op (C1, C2))
  296. static bool simplifyAssocCastAssoc(BinaryOperator *BinOp1,
  297. InstCombinerImpl &IC) {
  298. auto *Cast = dyn_cast<CastInst>(BinOp1->getOperand(0));
  299. if (!Cast || !Cast->hasOneUse())
  300. return false;
  301. // TODO: Enhance logic for other casts and remove this check.
  302. auto CastOpcode = Cast->getOpcode();
  303. if (CastOpcode != Instruction::ZExt)
  304. return false;
  305. // TODO: Enhance logic for other BinOps and remove this check.
  306. if (!BinOp1->isBitwiseLogicOp())
  307. return false;
  308. auto AssocOpcode = BinOp1->getOpcode();
  309. auto *BinOp2 = dyn_cast<BinaryOperator>(Cast->getOperand(0));
  310. if (!BinOp2 || !BinOp2->hasOneUse() || BinOp2->getOpcode() != AssocOpcode)
  311. return false;
  312. Constant *C1, *C2;
  313. if (!match(BinOp1->getOperand(1), m_Constant(C1)) ||
  314. !match(BinOp2->getOperand(1), m_Constant(C2)))
  315. return false;
  316. // TODO: This assumes a zext cast.
  317. // Eg, if it was a trunc, we'd cast C1 to the source type because casting C2
  318. // to the destination type might lose bits.
  319. // Fold the constants together in the destination type:
  320. // (op (cast (op X, C2)), C1) --> (op (cast X), FoldedC)
  321. Type *DestTy = C1->getType();
  322. Constant *CastC2 = ConstantExpr::getCast(CastOpcode, C2, DestTy);
  323. Constant *FoldedC = ConstantExpr::get(AssocOpcode, C1, CastC2);
  324. IC.replaceOperand(*Cast, 0, BinOp2->getOperand(0));
  325. IC.replaceOperand(*BinOp1, 1, FoldedC);
  326. return true;
  327. }
  328. // Simplifies IntToPtr/PtrToInt RoundTrip Cast To BitCast.
  329. // inttoptr ( ptrtoint (x) ) --> x
  330. Value *InstCombinerImpl::simplifyIntToPtrRoundTripCast(Value *Val) {
  331. auto *IntToPtr = dyn_cast<IntToPtrInst>(Val);
  332. if (IntToPtr && DL.getTypeSizeInBits(IntToPtr->getDestTy()) ==
  333. DL.getTypeSizeInBits(IntToPtr->getSrcTy())) {
  334. auto *PtrToInt = dyn_cast<PtrToIntInst>(IntToPtr->getOperand(0));
  335. Type *CastTy = IntToPtr->getDestTy();
  336. if (PtrToInt &&
  337. CastTy->getPointerAddressSpace() ==
  338. PtrToInt->getSrcTy()->getPointerAddressSpace() &&
  339. DL.getTypeSizeInBits(PtrToInt->getSrcTy()) ==
  340. DL.getTypeSizeInBits(PtrToInt->getDestTy())) {
  341. return CastInst::CreateBitOrPointerCast(PtrToInt->getOperand(0), CastTy,
  342. "", PtrToInt);
  343. }
  344. }
  345. return nullptr;
  346. }
  347. /// This performs a few simplifications for operators that are associative or
  348. /// commutative:
  349. ///
  350. /// Commutative operators:
  351. ///
  352. /// 1. Order operands such that they are listed from right (least complex) to
  353. /// left (most complex). This puts constants before unary operators before
  354. /// binary operators.
  355. ///
  356. /// Associative operators:
  357. ///
  358. /// 2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
  359. /// 3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
  360. ///
  361. /// Associative and commutative operators:
  362. ///
  363. /// 4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
  364. /// 5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
  365. /// 6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
  366. /// if C1 and C2 are constants.
  367. bool InstCombinerImpl::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
  368. Instruction::BinaryOps Opcode = I.getOpcode();
  369. bool Changed = false;
  370. do {
  371. // Order operands such that they are listed from right (least complex) to
  372. // left (most complex). This puts constants before unary operators before
  373. // binary operators.
  374. if (I.isCommutative() && getComplexity(I.getOperand(0)) <
  375. getComplexity(I.getOperand(1)))
  376. Changed = !I.swapOperands();
  377. BinaryOperator *Op0 = dyn_cast<BinaryOperator>(I.getOperand(0));
  378. BinaryOperator *Op1 = dyn_cast<BinaryOperator>(I.getOperand(1));
  379. if (I.isAssociative()) {
  380. // Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
  381. if (Op0 && Op0->getOpcode() == Opcode) {
  382. Value *A = Op0->getOperand(0);
  383. Value *B = Op0->getOperand(1);
  384. Value *C = I.getOperand(1);
  385. // Does "B op C" simplify?
  386. if (Value *V = simplifyBinOp(Opcode, B, C, SQ.getWithInstruction(&I))) {
  387. // It simplifies to V. Form "A op V".
  388. replaceOperand(I, 0, A);
  389. replaceOperand(I, 1, V);
  390. bool IsNUW = hasNoUnsignedWrap(I) && hasNoUnsignedWrap(*Op0);
  391. bool IsNSW = maintainNoSignedWrap(I, B, C) && hasNoSignedWrap(*Op0);
  392. // Conservatively clear all optional flags since they may not be
  393. // preserved by the reassociation. Reset nsw/nuw based on the above
  394. // analysis.
  395. ClearSubclassDataAfterReassociation(I);
  396. // Note: this is only valid because SimplifyBinOp doesn't look at
  397. // the operands to Op0.
  398. if (IsNUW)
  399. I.setHasNoUnsignedWrap(true);
  400. if (IsNSW)
  401. I.setHasNoSignedWrap(true);
  402. Changed = true;
  403. ++NumReassoc;
  404. continue;
  405. }
  406. }
  407. // Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
  408. if (Op1 && Op1->getOpcode() == Opcode) {
  409. Value *A = I.getOperand(0);
  410. Value *B = Op1->getOperand(0);
  411. Value *C = Op1->getOperand(1);
  412. // Does "A op B" simplify?
  413. if (Value *V = simplifyBinOp(Opcode, A, B, SQ.getWithInstruction(&I))) {
  414. // It simplifies to V. Form "V op C".
  415. replaceOperand(I, 0, V);
  416. replaceOperand(I, 1, C);
  417. // Conservatively clear the optional flags, since they may not be
  418. // preserved by the reassociation.
  419. ClearSubclassDataAfterReassociation(I);
  420. Changed = true;
  421. ++NumReassoc;
  422. continue;
  423. }
  424. }
  425. }
  426. if (I.isAssociative() && I.isCommutative()) {
  427. if (simplifyAssocCastAssoc(&I, *this)) {
  428. Changed = true;
  429. ++NumReassoc;
  430. continue;
  431. }
  432. // Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
  433. if (Op0 && Op0->getOpcode() == Opcode) {
  434. Value *A = Op0->getOperand(0);
  435. Value *B = Op0->getOperand(1);
  436. Value *C = I.getOperand(1);
  437. // Does "C op A" simplify?
  438. if (Value *V = simplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) {
  439. // It simplifies to V. Form "V op B".
  440. replaceOperand(I, 0, V);
  441. replaceOperand(I, 1, B);
  442. // Conservatively clear the optional flags, since they may not be
  443. // preserved by the reassociation.
  444. ClearSubclassDataAfterReassociation(I);
  445. Changed = true;
  446. ++NumReassoc;
  447. continue;
  448. }
  449. }
  450. // Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
  451. if (Op1 && Op1->getOpcode() == Opcode) {
  452. Value *A = I.getOperand(0);
  453. Value *B = Op1->getOperand(0);
  454. Value *C = Op1->getOperand(1);
  455. // Does "C op A" simplify?
  456. if (Value *V = simplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) {
  457. // It simplifies to V. Form "B op V".
  458. replaceOperand(I, 0, B);
  459. replaceOperand(I, 1, V);
  460. // Conservatively clear the optional flags, since they may not be
  461. // preserved by the reassociation.
  462. ClearSubclassDataAfterReassociation(I);
  463. Changed = true;
  464. ++NumReassoc;
  465. continue;
  466. }
  467. }
  468. // Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
  469. // if C1 and C2 are constants.
  470. Value *A, *B;
  471. Constant *C1, *C2, *CRes;
  472. if (Op0 && Op1 &&
  473. Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode &&
  474. match(Op0, m_OneUse(m_BinOp(m_Value(A), m_Constant(C1)))) &&
  475. match(Op1, m_OneUse(m_BinOp(m_Value(B), m_Constant(C2)))) &&
  476. (CRes = ConstantFoldBinaryOpOperands(Opcode, C1, C2, DL))) {
  477. bool IsNUW = hasNoUnsignedWrap(I) &&
  478. hasNoUnsignedWrap(*Op0) &&
  479. hasNoUnsignedWrap(*Op1);
  480. BinaryOperator *NewBO = (IsNUW && Opcode == Instruction::Add) ?
  481. BinaryOperator::CreateNUW(Opcode, A, B) :
  482. BinaryOperator::Create(Opcode, A, B);
  483. if (isa<FPMathOperator>(NewBO)) {
  484. FastMathFlags Flags = I.getFastMathFlags();
  485. Flags &= Op0->getFastMathFlags();
  486. Flags &= Op1->getFastMathFlags();
  487. NewBO->setFastMathFlags(Flags);
  488. }
  489. InsertNewInstWith(NewBO, I);
  490. NewBO->takeName(Op1);
  491. replaceOperand(I, 0, NewBO);
  492. replaceOperand(I, 1, CRes);
  493. // Conservatively clear the optional flags, since they may not be
  494. // preserved by the reassociation.
  495. ClearSubclassDataAfterReassociation(I);
  496. if (IsNUW)
  497. I.setHasNoUnsignedWrap(true);
  498. Changed = true;
  499. continue;
  500. }
  501. }
  502. // No further simplifications.
  503. return Changed;
  504. } while (true);
  505. }
  506. /// Return whether "X LOp (Y ROp Z)" is always equal to
  507. /// "(X LOp Y) ROp (X LOp Z)".
  508. static bool leftDistributesOverRight(Instruction::BinaryOps LOp,
  509. Instruction::BinaryOps ROp) {
  510. // X & (Y | Z) <--> (X & Y) | (X & Z)
  511. // X & (Y ^ Z) <--> (X & Y) ^ (X & Z)
  512. if (LOp == Instruction::And)
  513. return ROp == Instruction::Or || ROp == Instruction::Xor;
  514. // X | (Y & Z) <--> (X | Y) & (X | Z)
  515. if (LOp == Instruction::Or)
  516. return ROp == Instruction::And;
  517. // X * (Y + Z) <--> (X * Y) + (X * Z)
  518. // X * (Y - Z) <--> (X * Y) - (X * Z)
  519. if (LOp == Instruction::Mul)
  520. return ROp == Instruction::Add || ROp == Instruction::Sub;
  521. return false;
  522. }
  523. /// Return whether "(X LOp Y) ROp Z" is always equal to
  524. /// "(X ROp Z) LOp (Y ROp Z)".
  525. static bool rightDistributesOverLeft(Instruction::BinaryOps LOp,
  526. Instruction::BinaryOps ROp) {
  527. if (Instruction::isCommutative(ROp))
  528. return leftDistributesOverRight(ROp, LOp);
  529. // (X {&|^} Y) >> Z <--> (X >> Z) {&|^} (Y >> Z) for all shifts.
  530. return Instruction::isBitwiseLogicOp(LOp) && Instruction::isShift(ROp);
  531. // TODO: It would be nice to handle division, aka "(X + Y)/Z = X/Z + Y/Z",
  532. // but this requires knowing that the addition does not overflow and other
  533. // such subtleties.
  534. }
  535. /// This function returns identity value for given opcode, which can be used to
  536. /// factor patterns like (X * 2) + X ==> (X * 2) + (X * 1) ==> X * (2 + 1).
  537. static Value *getIdentityValue(Instruction::BinaryOps Opcode, Value *V) {
  538. if (isa<Constant>(V))
  539. return nullptr;
  540. return ConstantExpr::getBinOpIdentity(Opcode, V->getType());
  541. }
  542. /// This function predicates factorization using distributive laws. By default,
  543. /// it just returns the 'Op' inputs. But for special-cases like
  544. /// 'add(shl(X, 5), ...)', this function will have TopOpcode == Instruction::Add
  545. /// and Op = shl(X, 5). The 'shl' is treated as the more general 'mul X, 32' to
  546. /// allow more factorization opportunities.
  547. static Instruction::BinaryOps
  548. getBinOpsForFactorization(Instruction::BinaryOps TopOpcode, BinaryOperator *Op,
  549. Value *&LHS, Value *&RHS) {
  550. assert(Op && "Expected a binary operator");
  551. LHS = Op->getOperand(0);
  552. RHS = Op->getOperand(1);
  553. if (TopOpcode == Instruction::Add || TopOpcode == Instruction::Sub) {
  554. Constant *C;
  555. if (match(Op, m_Shl(m_Value(), m_Constant(C)))) {
  556. // X << C --> X * (1 << C)
  557. RHS = ConstantExpr::getShl(ConstantInt::get(Op->getType(), 1), C);
  558. return Instruction::Mul;
  559. }
  560. // TODO: We can add other conversions e.g. shr => div etc.
  561. }
  562. return Op->getOpcode();
  563. }
  564. /// This tries to simplify binary operations by factorizing out common terms
  565. /// (e. g. "(A*B)+(A*C)" -> "A*(B+C)").
  566. static Value *tryFactorization(BinaryOperator &I, const SimplifyQuery &SQ,
  567. InstCombiner::BuilderTy &Builder,
  568. Instruction::BinaryOps InnerOpcode, Value *A,
  569. Value *B, Value *C, Value *D) {
  570. assert(A && B && C && D && "All values must be provided");
  571. Value *V = nullptr;
  572. Value *RetVal = nullptr;
  573. Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
  574. Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
  575. // Does "X op' Y" always equal "Y op' X"?
  576. bool InnerCommutative = Instruction::isCommutative(InnerOpcode);
  577. // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"?
  578. if (leftDistributesOverRight(InnerOpcode, TopLevelOpcode)) {
  579. // Does the instruction have the form "(A op' B) op (A op' D)" or, in the
  580. // commutative case, "(A op' B) op (C op' A)"?
  581. if (A == C || (InnerCommutative && A == D)) {
  582. if (A != C)
  583. std::swap(C, D);
  584. // Consider forming "A op' (B op D)".
  585. // If "B op D" simplifies then it can be formed with no cost.
  586. V = simplifyBinOp(TopLevelOpcode, B, D, SQ.getWithInstruction(&I));
  587. // If "B op D" doesn't simplify then only go on if one of the existing
  588. // operations "A op' B" and "C op' D" will be zapped as no longer used.
  589. if (!V && (LHS->hasOneUse() || RHS->hasOneUse()))
  590. V = Builder.CreateBinOp(TopLevelOpcode, B, D, RHS->getName());
  591. if (V)
  592. RetVal = Builder.CreateBinOp(InnerOpcode, A, V);
  593. }
  594. }
  595. // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"?
  596. if (!RetVal && rightDistributesOverLeft(TopLevelOpcode, InnerOpcode)) {
  597. // Does the instruction have the form "(A op' B) op (C op' B)" or, in the
  598. // commutative case, "(A op' B) op (B op' D)"?
  599. if (B == D || (InnerCommutative && B == C)) {
  600. if (B != D)
  601. std::swap(C, D);
  602. // Consider forming "(A op C) op' B".
  603. // If "A op C" simplifies then it can be formed with no cost.
  604. V = simplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I));
  605. // If "A op C" doesn't simplify then only go on if one of the existing
  606. // operations "A op' B" and "C op' D" will be zapped as no longer used.
  607. if (!V && (LHS->hasOneUse() || RHS->hasOneUse()))
  608. V = Builder.CreateBinOp(TopLevelOpcode, A, C, LHS->getName());
  609. if (V)
  610. RetVal = Builder.CreateBinOp(InnerOpcode, V, B);
  611. }
  612. }
  613. if (!RetVal)
  614. return nullptr;
  615. ++NumFactor;
  616. RetVal->takeName(&I);
  617. // Try to add no-overflow flags to the final value.
  618. if (isa<OverflowingBinaryOperator>(RetVal)) {
  619. bool HasNSW = false;
  620. bool HasNUW = false;
  621. if (isa<OverflowingBinaryOperator>(&I)) {
  622. HasNSW = I.hasNoSignedWrap();
  623. HasNUW = I.hasNoUnsignedWrap();
  624. }
  625. if (auto *LOBO = dyn_cast<OverflowingBinaryOperator>(LHS)) {
  626. HasNSW &= LOBO->hasNoSignedWrap();
  627. HasNUW &= LOBO->hasNoUnsignedWrap();
  628. }
  629. if (auto *ROBO = dyn_cast<OverflowingBinaryOperator>(RHS)) {
  630. HasNSW &= ROBO->hasNoSignedWrap();
  631. HasNUW &= ROBO->hasNoUnsignedWrap();
  632. }
  633. if (TopLevelOpcode == Instruction::Add && InnerOpcode == Instruction::Mul) {
  634. // We can propagate 'nsw' if we know that
  635. // %Y = mul nsw i16 %X, C
  636. // %Z = add nsw i16 %Y, %X
  637. // =>
  638. // %Z = mul nsw i16 %X, C+1
  639. //
  640. // iff C+1 isn't INT_MIN
  641. const APInt *CInt;
  642. if (match(V, m_APInt(CInt)) && !CInt->isMinSignedValue())
  643. cast<Instruction>(RetVal)->setHasNoSignedWrap(HasNSW);
  644. // nuw can be propagated with any constant or nuw value.
  645. cast<Instruction>(RetVal)->setHasNoUnsignedWrap(HasNUW);
  646. }
  647. }
  648. return RetVal;
  649. }
  650. Value *InstCombinerImpl::tryFactorizationFolds(BinaryOperator &I) {
  651. Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
  652. BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
  653. BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
  654. Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
  655. Value *A, *B, *C, *D;
  656. Instruction::BinaryOps LHSOpcode, RHSOpcode;
  657. if (Op0)
  658. LHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op0, A, B);
  659. if (Op1)
  660. RHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op1, C, D);
  661. // The instruction has the form "(A op' B) op (C op' D)". Try to factorize
  662. // a common term.
  663. if (Op0 && Op1 && LHSOpcode == RHSOpcode)
  664. if (Value *V = tryFactorization(I, SQ, Builder, LHSOpcode, A, B, C, D))
  665. return V;
  666. // The instruction has the form "(A op' B) op (C)". Try to factorize common
  667. // term.
  668. if (Op0)
  669. if (Value *Ident = getIdentityValue(LHSOpcode, RHS))
  670. if (Value *V =
  671. tryFactorization(I, SQ, Builder, LHSOpcode, A, B, RHS, Ident))
  672. return V;
  673. // The instruction has the form "(B) op (C op' D)". Try to factorize common
  674. // term.
  675. if (Op1)
  676. if (Value *Ident = getIdentityValue(RHSOpcode, LHS))
  677. if (Value *V =
  678. tryFactorization(I, SQ, Builder, RHSOpcode, LHS, Ident, C, D))
  679. return V;
  680. return nullptr;
  681. }
  682. /// This tries to simplify binary operations which some other binary operation
  683. /// distributes over either by factorizing out common terms
  684. /// (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this results in
  685. /// simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is a win).
  686. /// Returns the simplified value, or null if it didn't simplify.
  687. Value *InstCombinerImpl::foldUsingDistributiveLaws(BinaryOperator &I) {
  688. Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
  689. BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
  690. BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
  691. Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
  692. // Factorization.
  693. if (Value *R = tryFactorizationFolds(I))
  694. return R;
  695. // Expansion.
  696. if (Op0 && rightDistributesOverLeft(Op0->getOpcode(), TopLevelOpcode)) {
  697. // The instruction has the form "(A op' B) op C". See if expanding it out
  698. // to "(A op C) op' (B op C)" results in simplifications.
  699. Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS;
  700. Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
  701. // Disable the use of undef because it's not safe to distribute undef.
  702. auto SQDistributive = SQ.getWithInstruction(&I).getWithoutUndef();
  703. Value *L = simplifyBinOp(TopLevelOpcode, A, C, SQDistributive);
  704. Value *R = simplifyBinOp(TopLevelOpcode, B, C, SQDistributive);
  705. // Do "A op C" and "B op C" both simplify?
  706. if (L && R) {
  707. // They do! Return "L op' R".
  708. ++NumExpand;
  709. C = Builder.CreateBinOp(InnerOpcode, L, R);
  710. C->takeName(&I);
  711. return C;
  712. }
  713. // Does "A op C" simplify to the identity value for the inner opcode?
  714. if (L && L == ConstantExpr::getBinOpIdentity(InnerOpcode, L->getType())) {
  715. // They do! Return "B op C".
  716. ++NumExpand;
  717. C = Builder.CreateBinOp(TopLevelOpcode, B, C);
  718. C->takeName(&I);
  719. return C;
  720. }
  721. // Does "B op C" simplify to the identity value for the inner opcode?
  722. if (R && R == ConstantExpr::getBinOpIdentity(InnerOpcode, R->getType())) {
  723. // They do! Return "A op C".
  724. ++NumExpand;
  725. C = Builder.CreateBinOp(TopLevelOpcode, A, C);
  726. C->takeName(&I);
  727. return C;
  728. }
  729. }
  730. if (Op1 && leftDistributesOverRight(TopLevelOpcode, Op1->getOpcode())) {
  731. // The instruction has the form "A op (B op' C)". See if expanding it out
  732. // to "(A op B) op' (A op C)" results in simplifications.
  733. Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1);
  734. Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op'
  735. // Disable the use of undef because it's not safe to distribute undef.
  736. auto SQDistributive = SQ.getWithInstruction(&I).getWithoutUndef();
  737. Value *L = simplifyBinOp(TopLevelOpcode, A, B, SQDistributive);
  738. Value *R = simplifyBinOp(TopLevelOpcode, A, C, SQDistributive);
  739. // Do "A op B" and "A op C" both simplify?
  740. if (L && R) {
  741. // They do! Return "L op' R".
  742. ++NumExpand;
  743. A = Builder.CreateBinOp(InnerOpcode, L, R);
  744. A->takeName(&I);
  745. return A;
  746. }
  747. // Does "A op B" simplify to the identity value for the inner opcode?
  748. if (L && L == ConstantExpr::getBinOpIdentity(InnerOpcode, L->getType())) {
  749. // They do! Return "A op C".
  750. ++NumExpand;
  751. A = Builder.CreateBinOp(TopLevelOpcode, A, C);
  752. A->takeName(&I);
  753. return A;
  754. }
  755. // Does "A op C" simplify to the identity value for the inner opcode?
  756. if (R && R == ConstantExpr::getBinOpIdentity(InnerOpcode, R->getType())) {
  757. // They do! Return "A op B".
  758. ++NumExpand;
  759. A = Builder.CreateBinOp(TopLevelOpcode, A, B);
  760. A->takeName(&I);
  761. return A;
  762. }
  763. }
  764. return SimplifySelectsFeedingBinaryOp(I, LHS, RHS);
  765. }
  766. Value *InstCombinerImpl::SimplifySelectsFeedingBinaryOp(BinaryOperator &I,
  767. Value *LHS,
  768. Value *RHS) {
  769. Value *A, *B, *C, *D, *E, *F;
  770. bool LHSIsSelect = match(LHS, m_Select(m_Value(A), m_Value(B), m_Value(C)));
  771. bool RHSIsSelect = match(RHS, m_Select(m_Value(D), m_Value(E), m_Value(F)));
  772. if (!LHSIsSelect && !RHSIsSelect)
  773. return nullptr;
  774. FastMathFlags FMF;
  775. BuilderTy::FastMathFlagGuard Guard(Builder);
  776. if (isa<FPMathOperator>(&I)) {
  777. FMF = I.getFastMathFlags();
  778. Builder.setFastMathFlags(FMF);
  779. }
  780. Instruction::BinaryOps Opcode = I.getOpcode();
  781. SimplifyQuery Q = SQ.getWithInstruction(&I);
  782. Value *Cond, *True = nullptr, *False = nullptr;
  783. // Special-case for add/negate combination. Replace the zero in the negation
  784. // with the trailing add operand:
  785. // (Cond ? TVal : -N) + Z --> Cond ? True : (Z - N)
  786. // (Cond ? -N : FVal) + Z --> Cond ? (Z - N) : False
  787. auto foldAddNegate = [&](Value *TVal, Value *FVal, Value *Z) -> Value * {
  788. // We need an 'add' and exactly 1 arm of the select to have been simplified.
  789. if (Opcode != Instruction::Add || (!True && !False) || (True && False))
  790. return nullptr;
  791. Value *N;
  792. if (True && match(FVal, m_Neg(m_Value(N)))) {
  793. Value *Sub = Builder.CreateSub(Z, N);
  794. return Builder.CreateSelect(Cond, True, Sub, I.getName());
  795. }
  796. if (False && match(TVal, m_Neg(m_Value(N)))) {
  797. Value *Sub = Builder.CreateSub(Z, N);
  798. return Builder.CreateSelect(Cond, Sub, False, I.getName());
  799. }
  800. return nullptr;
  801. };
  802. if (LHSIsSelect && RHSIsSelect && A == D) {
  803. // (A ? B : C) op (A ? E : F) -> A ? (B op E) : (C op F)
  804. Cond = A;
  805. True = simplifyBinOp(Opcode, B, E, FMF, Q);
  806. False = simplifyBinOp(Opcode, C, F, FMF, Q);
  807. if (LHS->hasOneUse() && RHS->hasOneUse()) {
  808. if (False && !True)
  809. True = Builder.CreateBinOp(Opcode, B, E);
  810. else if (True && !False)
  811. False = Builder.CreateBinOp(Opcode, C, F);
  812. }
  813. } else if (LHSIsSelect && LHS->hasOneUse()) {
  814. // (A ? B : C) op Y -> A ? (B op Y) : (C op Y)
  815. Cond = A;
  816. True = simplifyBinOp(Opcode, B, RHS, FMF, Q);
  817. False = simplifyBinOp(Opcode, C, RHS, FMF, Q);
  818. if (Value *NewSel = foldAddNegate(B, C, RHS))
  819. return NewSel;
  820. } else if (RHSIsSelect && RHS->hasOneUse()) {
  821. // X op (D ? E : F) -> D ? (X op E) : (X op F)
  822. Cond = D;
  823. True = simplifyBinOp(Opcode, LHS, E, FMF, Q);
  824. False = simplifyBinOp(Opcode, LHS, F, FMF, Q);
  825. if (Value *NewSel = foldAddNegate(E, F, LHS))
  826. return NewSel;
  827. }
  828. if (!True || !False)
  829. return nullptr;
  830. Value *SI = Builder.CreateSelect(Cond, True, False);
  831. SI->takeName(&I);
  832. return SI;
  833. }
  834. /// Freely adapt every user of V as-if V was changed to !V.
  835. /// WARNING: only if canFreelyInvertAllUsersOf() said this can be done.
  836. void InstCombinerImpl::freelyInvertAllUsersOf(Value *I, Value *IgnoredUser) {
  837. for (User *U : make_early_inc_range(I->users())) {
  838. if (U == IgnoredUser)
  839. continue; // Don't consider this user.
  840. switch (cast<Instruction>(U)->getOpcode()) {
  841. case Instruction::Select: {
  842. auto *SI = cast<SelectInst>(U);
  843. SI->swapValues();
  844. SI->swapProfMetadata();
  845. break;
  846. }
  847. case Instruction::Br:
  848. cast<BranchInst>(U)->swapSuccessors(); // swaps prof metadata too
  849. break;
  850. case Instruction::Xor:
  851. replaceInstUsesWith(cast<Instruction>(*U), I);
  852. break;
  853. default:
  854. llvm_unreachable("Got unexpected user - out of sync with "
  855. "canFreelyInvertAllUsersOf() ?");
  856. }
  857. }
  858. }
  859. /// Given a 'sub' instruction, return the RHS of the instruction if the LHS is a
  860. /// constant zero (which is the 'negate' form).
  861. Value *InstCombinerImpl::dyn_castNegVal(Value *V) const {
  862. Value *NegV;
  863. if (match(V, m_Neg(m_Value(NegV))))
  864. return NegV;
  865. // Constants can be considered to be negated values if they can be folded.
  866. if (ConstantInt *C = dyn_cast<ConstantInt>(V))
  867. return ConstantExpr::getNeg(C);
  868. if (ConstantDataVector *C = dyn_cast<ConstantDataVector>(V))
  869. if (C->getType()->getElementType()->isIntegerTy())
  870. return ConstantExpr::getNeg(C);
  871. if (ConstantVector *CV = dyn_cast<ConstantVector>(V)) {
  872. for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
  873. Constant *Elt = CV->getAggregateElement(i);
  874. if (!Elt)
  875. return nullptr;
  876. if (isa<UndefValue>(Elt))
  877. continue;
  878. if (!isa<ConstantInt>(Elt))
  879. return nullptr;
  880. }
  881. return ConstantExpr::getNeg(CV);
  882. }
  883. // Negate integer vector splats.
  884. if (auto *CV = dyn_cast<Constant>(V))
  885. if (CV->getType()->isVectorTy() &&
  886. CV->getType()->getScalarType()->isIntegerTy() && CV->getSplatValue())
  887. return ConstantExpr::getNeg(CV);
  888. return nullptr;
  889. }
  890. /// A binop with a constant operand and a sign-extended boolean operand may be
  891. /// converted into a select of constants by applying the binary operation to
  892. /// the constant with the two possible values of the extended boolean (0 or -1).
  893. Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) {
  894. // TODO: Handle non-commutative binop (constant is operand 0).
  895. // TODO: Handle zext.
  896. // TODO: Peek through 'not' of cast.
  897. Value *BO0 = BO.getOperand(0);
  898. Value *BO1 = BO.getOperand(1);
  899. Value *X;
  900. Constant *C;
  901. if (!match(BO0, m_SExt(m_Value(X))) || !match(BO1, m_ImmConstant(C)) ||
  902. !X->getType()->isIntOrIntVectorTy(1))
  903. return nullptr;
  904. // bo (sext i1 X), C --> select X, (bo -1, C), (bo 0, C)
  905. Constant *Ones = ConstantInt::getAllOnesValue(BO.getType());
  906. Constant *Zero = ConstantInt::getNullValue(BO.getType());
  907. Value *TVal = Builder.CreateBinOp(BO.getOpcode(), Ones, C);
  908. Value *FVal = Builder.CreateBinOp(BO.getOpcode(), Zero, C);
  909. return SelectInst::Create(X, TVal, FVal);
  910. }
  911. static Constant *constantFoldOperationIntoSelectOperand(
  912. Instruction &I, SelectInst *SI, Value *SO) {
  913. auto *ConstSO = dyn_cast<Constant>(SO);
  914. if (!ConstSO)
  915. return nullptr;
  916. SmallVector<Constant *> ConstOps;
  917. for (Value *Op : I.operands()) {
  918. if (Op == SI)
  919. ConstOps.push_back(ConstSO);
  920. else if (auto *C = dyn_cast<Constant>(Op))
  921. ConstOps.push_back(C);
  922. else
  923. llvm_unreachable("Operands should be select or constant");
  924. }
  925. return ConstantFoldInstOperands(&I, ConstOps, I.getModule()->getDataLayout());
  926. }
  927. static Value *foldOperationIntoSelectOperand(Instruction &I, Value *SO,
  928. InstCombiner::BuilderTy &Builder) {
  929. if (auto *Cast = dyn_cast<CastInst>(&I))
  930. return Builder.CreateCast(Cast->getOpcode(), SO, I.getType());
  931. if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
  932. assert(canConstantFoldCallTo(II, cast<Function>(II->getCalledOperand())) &&
  933. "Expected constant-foldable intrinsic");
  934. Intrinsic::ID IID = II->getIntrinsicID();
  935. if (II->arg_size() == 1)
  936. return Builder.CreateUnaryIntrinsic(IID, SO);
  937. // This works for real binary ops like min/max (where we always expect the
  938. // constant operand to be canonicalized as op1) and unary ops with a bonus
  939. // constant argument like ctlz/cttz.
  940. // TODO: Handle non-commutative binary intrinsics as below for binops.
  941. assert(II->arg_size() == 2 && "Expected binary intrinsic");
  942. assert(isa<Constant>(II->getArgOperand(1)) && "Expected constant operand");
  943. return Builder.CreateBinaryIntrinsic(IID, SO, II->getArgOperand(1));
  944. }
  945. if (auto *EI = dyn_cast<ExtractElementInst>(&I))
  946. return Builder.CreateExtractElement(SO, EI->getIndexOperand());
  947. assert(I.isBinaryOp() && "Unexpected opcode for select folding");
  948. // Figure out if the constant is the left or the right argument.
  949. bool ConstIsRHS = isa<Constant>(I.getOperand(1));
  950. Constant *ConstOperand = cast<Constant>(I.getOperand(ConstIsRHS));
  951. Value *Op0 = SO, *Op1 = ConstOperand;
  952. if (!ConstIsRHS)
  953. std::swap(Op0, Op1);
  954. Value *NewBO = Builder.CreateBinOp(cast<BinaryOperator>(&I)->getOpcode(), Op0,
  955. Op1, SO->getName() + ".op");
  956. if (auto *NewBOI = dyn_cast<Instruction>(NewBO))
  957. NewBOI->copyIRFlags(&I);
  958. return NewBO;
  959. }
  960. Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI,
  961. bool FoldWithMultiUse) {
  962. // Don't modify shared select instructions unless set FoldWithMultiUse
  963. if (!SI->hasOneUse() && !FoldWithMultiUse)
  964. return nullptr;
  965. Value *TV = SI->getTrueValue();
  966. Value *FV = SI->getFalseValue();
  967. if (!(isa<Constant>(TV) || isa<Constant>(FV)))
  968. return nullptr;
  969. // Bool selects with constant operands can be folded to logical ops.
  970. if (SI->getType()->isIntOrIntVectorTy(1))
  971. return nullptr;
  972. // If it's a bitcast involving vectors, make sure it has the same number of
  973. // elements on both sides.
  974. if (auto *BC = dyn_cast<BitCastInst>(&Op)) {
  975. VectorType *DestTy = dyn_cast<VectorType>(BC->getDestTy());
  976. VectorType *SrcTy = dyn_cast<VectorType>(BC->getSrcTy());
  977. // Verify that either both or neither are vectors.
  978. if ((SrcTy == nullptr) != (DestTy == nullptr))
  979. return nullptr;
  980. // If vectors, verify that they have the same number of elements.
  981. if (SrcTy && SrcTy->getElementCount() != DestTy->getElementCount())
  982. return nullptr;
  983. }
  984. // Test if a CmpInst instruction is used exclusively by a select as
  985. // part of a minimum or maximum operation. If so, refrain from doing
  986. // any other folding. This helps out other analyses which understand
  987. // non-obfuscated minimum and maximum idioms, such as ScalarEvolution
  988. // and CodeGen. And in this case, at least one of the comparison
  989. // operands has at least one user besides the compare (the select),
  990. // which would often largely negate the benefit of folding anyway.
  991. if (auto *CI = dyn_cast<CmpInst>(SI->getCondition())) {
  992. if (CI->hasOneUse()) {
  993. Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
  994. // FIXME: This is a hack to avoid infinite looping with min/max patterns.
  995. // We have to ensure that vector constants that only differ with
  996. // undef elements are treated as equivalent.
  997. auto areLooselyEqual = [](Value *A, Value *B) {
  998. if (A == B)
  999. return true;
  1000. // Test for vector constants.
  1001. Constant *ConstA, *ConstB;
  1002. if (!match(A, m_Constant(ConstA)) || !match(B, m_Constant(ConstB)))
  1003. return false;
  1004. // TODO: Deal with FP constants?
  1005. if (!A->getType()->isIntOrIntVectorTy() || A->getType() != B->getType())
  1006. return false;
  1007. // Compare for equality including undefs as equal.
  1008. auto *Cmp = ConstantExpr::getCompare(ICmpInst::ICMP_EQ, ConstA, ConstB);
  1009. const APInt *C;
  1010. return match(Cmp, m_APIntAllowUndef(C)) && C->isOne();
  1011. };
  1012. if ((areLooselyEqual(TV, Op0) && areLooselyEqual(FV, Op1)) ||
  1013. (areLooselyEqual(FV, Op0) && areLooselyEqual(TV, Op1)))
  1014. return nullptr;
  1015. }
  1016. }
  1017. // Make sure that one of the select arms constant folds successfully.
  1018. Value *NewTV = constantFoldOperationIntoSelectOperand(Op, SI, TV);
  1019. Value *NewFV = constantFoldOperationIntoSelectOperand(Op, SI, FV);
  1020. if (!NewTV && !NewFV)
  1021. return nullptr;
  1022. // Create an instruction for the arm that did not fold.
  1023. if (!NewTV)
  1024. NewTV = foldOperationIntoSelectOperand(Op, TV, Builder);
  1025. if (!NewFV)
  1026. NewFV = foldOperationIntoSelectOperand(Op, FV, Builder);
  1027. return SelectInst::Create(SI->getCondition(), NewTV, NewFV, "", nullptr, SI);
  1028. }
  1029. Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) {
  1030. unsigned NumPHIValues = PN->getNumIncomingValues();
  1031. if (NumPHIValues == 0)
  1032. return nullptr;
  1033. // We normally only transform phis with a single use. However, if a PHI has
  1034. // multiple uses and they are all the same operation, we can fold *all* of the
  1035. // uses into the PHI.
  1036. if (!PN->hasOneUse()) {
  1037. // Walk the use list for the instruction, comparing them to I.
  1038. for (User *U : PN->users()) {
  1039. Instruction *UI = cast<Instruction>(U);
  1040. if (UI != &I && !I.isIdenticalTo(UI))
  1041. return nullptr;
  1042. }
  1043. // Otherwise, we can replace *all* users with the new PHI we form.
  1044. }
  1045. // Check to see whether the instruction can be folded into each phi operand.
  1046. // If there is one operand that does not fold, remember the BB it is in.
  1047. // If there is more than one or if *it* is a PHI, bail out.
  1048. SmallVector<Value *> NewPhiValues;
  1049. BasicBlock *NonSimplifiedBB = nullptr;
  1050. Value *NonSimplifiedInVal = nullptr;
  1051. for (unsigned i = 0; i != NumPHIValues; ++i) {
  1052. Value *InVal = PN->getIncomingValue(i);
  1053. BasicBlock *InBB = PN->getIncomingBlock(i);
  1054. // NB: It is a precondition of this transform that the operands be
  1055. // phi translatable! This is usually trivially satisfied by limiting it
  1056. // to constant ops, and for selects we do a more sophisticated check.
  1057. SmallVector<Value *> Ops;
  1058. for (Value *Op : I.operands()) {
  1059. if (Op == PN)
  1060. Ops.push_back(InVal);
  1061. else
  1062. Ops.push_back(Op->DoPHITranslation(PN->getParent(), InBB));
  1063. }
  1064. // Don't consider the simplification successful if we get back a constant
  1065. // expression. That's just an instruction in hiding.
  1066. // Also reject the case where we simplify back to the phi node. We wouldn't
  1067. // be able to remove it in that case.
  1068. Value *NewVal = simplifyInstructionWithOperands(
  1069. &I, Ops, SQ.getWithInstruction(InBB->getTerminator()));
  1070. if (NewVal && NewVal != PN && !match(NewVal, m_ConstantExpr())) {
  1071. NewPhiValues.push_back(NewVal);
  1072. continue;
  1073. }
  1074. if (isa<PHINode>(InVal)) return nullptr; // Itself a phi.
  1075. if (NonSimplifiedBB) return nullptr; // More than one non-simplified value.
  1076. NonSimplifiedBB = InBB;
  1077. NonSimplifiedInVal = InVal;
  1078. NewPhiValues.push_back(nullptr);
  1079. // If the InVal is an invoke at the end of the pred block, then we can't
  1080. // insert a computation after it without breaking the edge.
  1081. if (isa<InvokeInst>(InVal))
  1082. if (cast<Instruction>(InVal)->getParent() == NonSimplifiedBB)
  1083. return nullptr;
  1084. // If the incoming non-constant value is reachable from the phis block,
  1085. // we'll push the operation across a loop backedge. This could result in
  1086. // an infinite combine loop, and is generally non-profitable (especially
  1087. // if the operation was originally outside the loop).
  1088. if (isPotentiallyReachable(PN->getParent(), NonSimplifiedBB, nullptr, &DT,
  1089. LI))
  1090. return nullptr;
  1091. }
  1092. // If there is exactly one non-simplified value, we can insert a copy of the
  1093. // operation in that block. However, if this is a critical edge, we would be
  1094. // inserting the computation on some other paths (e.g. inside a loop). Only
  1095. // do this if the pred block is unconditionally branching into the phi block.
  1096. // Also, make sure that the pred block is not dead code.
  1097. if (NonSimplifiedBB != nullptr) {
  1098. BranchInst *BI = dyn_cast<BranchInst>(NonSimplifiedBB->getTerminator());
  1099. if (!BI || !BI->isUnconditional() ||
  1100. !DT.isReachableFromEntry(NonSimplifiedBB))
  1101. return nullptr;
  1102. }
  1103. // Okay, we can do the transformation: create the new PHI node.
  1104. PHINode *NewPN = PHINode::Create(I.getType(), PN->getNumIncomingValues());
  1105. InsertNewInstBefore(NewPN, *PN);
  1106. NewPN->takeName(PN);
  1107. // If we are going to have to insert a new computation, do so right before the
  1108. // predecessor's terminator.
  1109. Instruction *Clone = nullptr;
  1110. if (NonSimplifiedBB) {
  1111. Clone = I.clone();
  1112. for (Use &U : Clone->operands()) {
  1113. if (U == PN)
  1114. U = NonSimplifiedInVal;
  1115. else
  1116. U = U->DoPHITranslation(PN->getParent(), NonSimplifiedBB);
  1117. }
  1118. InsertNewInstBefore(Clone, *NonSimplifiedBB->getTerminator());
  1119. }
  1120. for (unsigned i = 0; i != NumPHIValues; ++i) {
  1121. if (NewPhiValues[i])
  1122. NewPN->addIncoming(NewPhiValues[i], PN->getIncomingBlock(i));
  1123. else
  1124. NewPN->addIncoming(Clone, PN->getIncomingBlock(i));
  1125. }
  1126. for (User *U : make_early_inc_range(PN->users())) {
  1127. Instruction *User = cast<Instruction>(U);
  1128. if (User == &I) continue;
  1129. replaceInstUsesWith(*User, NewPN);
  1130. eraseInstFromFunction(*User);
  1131. }
  1132. return replaceInstUsesWith(I, NewPN);
  1133. }
  1134. Instruction *InstCombinerImpl::foldBinopWithPhiOperands(BinaryOperator &BO) {
  1135. // TODO: This should be similar to the incoming values check in foldOpIntoPhi:
  1136. // we are guarding against replicating the binop in >1 predecessor.
  1137. // This could miss matching a phi with 2 constant incoming values.
  1138. auto *Phi0 = dyn_cast<PHINode>(BO.getOperand(0));
  1139. auto *Phi1 = dyn_cast<PHINode>(BO.getOperand(1));
  1140. if (!Phi0 || !Phi1 || !Phi0->hasOneUse() || !Phi1->hasOneUse() ||
  1141. Phi0->getNumOperands() != 2 || Phi1->getNumOperands() != 2)
  1142. return nullptr;
  1143. // TODO: Remove the restriction for binop being in the same block as the phis.
  1144. if (BO.getParent() != Phi0->getParent() ||
  1145. BO.getParent() != Phi1->getParent())
  1146. return nullptr;
  1147. // Match a pair of incoming constants for one of the predecessor blocks.
  1148. BasicBlock *ConstBB, *OtherBB;
  1149. Constant *C0, *C1;
  1150. if (match(Phi0->getIncomingValue(0), m_ImmConstant(C0))) {
  1151. ConstBB = Phi0->getIncomingBlock(0);
  1152. OtherBB = Phi0->getIncomingBlock(1);
  1153. } else if (match(Phi0->getIncomingValue(1), m_ImmConstant(C0))) {
  1154. ConstBB = Phi0->getIncomingBlock(1);
  1155. OtherBB = Phi0->getIncomingBlock(0);
  1156. } else {
  1157. return nullptr;
  1158. }
  1159. if (!match(Phi1->getIncomingValueForBlock(ConstBB), m_ImmConstant(C1)))
  1160. return nullptr;
  1161. // The block that we are hoisting to must reach here unconditionally.
  1162. // Otherwise, we could be speculatively executing an expensive or
  1163. // non-speculative op.
  1164. auto *PredBlockBranch = dyn_cast<BranchInst>(OtherBB->getTerminator());
  1165. if (!PredBlockBranch || PredBlockBranch->isConditional() ||
  1166. !DT.isReachableFromEntry(OtherBB))
  1167. return nullptr;
  1168. // TODO: This check could be tightened to only apply to binops (div/rem) that
  1169. // are not safe to speculatively execute. But that could allow hoisting
  1170. // potentially expensive instructions (fdiv for example).
  1171. for (auto BBIter = BO.getParent()->begin(); &*BBIter != &BO; ++BBIter)
  1172. if (!isGuaranteedToTransferExecutionToSuccessor(&*BBIter))
  1173. return nullptr;
  1174. // Fold constants for the predecessor block with constant incoming values.
  1175. Constant *NewC = ConstantFoldBinaryOpOperands(BO.getOpcode(), C0, C1, DL);
  1176. if (!NewC)
  1177. return nullptr;
  1178. // Make a new binop in the predecessor block with the non-constant incoming
  1179. // values.
  1180. Builder.SetInsertPoint(PredBlockBranch);
  1181. Value *NewBO = Builder.CreateBinOp(BO.getOpcode(),
  1182. Phi0->getIncomingValueForBlock(OtherBB),
  1183. Phi1->getIncomingValueForBlock(OtherBB));
  1184. if (auto *NotFoldedNewBO = dyn_cast<BinaryOperator>(NewBO))
  1185. NotFoldedNewBO->copyIRFlags(&BO);
  1186. // Replace the binop with a phi of the new values. The old phis are dead.
  1187. PHINode *NewPhi = PHINode::Create(BO.getType(), 2);
  1188. NewPhi->addIncoming(NewBO, OtherBB);
  1189. NewPhi->addIncoming(NewC, ConstBB);
  1190. return NewPhi;
  1191. }
  1192. Instruction *InstCombinerImpl::foldBinOpIntoSelectOrPhi(BinaryOperator &I) {
  1193. if (!isa<Constant>(I.getOperand(1)))
  1194. return nullptr;
  1195. if (auto *Sel = dyn_cast<SelectInst>(I.getOperand(0))) {
  1196. if (Instruction *NewSel = FoldOpIntoSelect(I, Sel))
  1197. return NewSel;
  1198. } else if (auto *PN = dyn_cast<PHINode>(I.getOperand(0))) {
  1199. if (Instruction *NewPhi = foldOpIntoPhi(I, PN))
  1200. return NewPhi;
  1201. }
  1202. return nullptr;
  1203. }
  1204. /// Given a pointer type and a constant offset, determine whether or not there
  1205. /// is a sequence of GEP indices into the pointed type that will land us at the
  1206. /// specified offset. If so, fill them into NewIndices and return the resultant
  1207. /// element type, otherwise return null.
  1208. static Type *findElementAtOffset(PointerType *PtrTy, int64_t IntOffset,
  1209. SmallVectorImpl<Value *> &NewIndices,
  1210. const DataLayout &DL) {
  1211. // Only used by visitGEPOfBitcast(), which is skipped for opaque pointers.
  1212. Type *Ty = PtrTy->getNonOpaquePointerElementType();
  1213. if (!Ty->isSized())
  1214. return nullptr;
  1215. APInt Offset(DL.getIndexTypeSizeInBits(PtrTy), IntOffset);
  1216. SmallVector<APInt> Indices = DL.getGEPIndicesForOffset(Ty, Offset);
  1217. if (!Offset.isZero())
  1218. return nullptr;
  1219. for (const APInt &Index : Indices)
  1220. NewIndices.push_back(ConstantInt::get(PtrTy->getContext(), Index));
  1221. return Ty;
  1222. }
  1223. static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src) {
  1224. // If this GEP has only 0 indices, it is the same pointer as
  1225. // Src. If Src is not a trivial GEP too, don't combine
  1226. // the indices.
  1227. if (GEP.hasAllZeroIndices() && !Src.hasAllZeroIndices() &&
  1228. !Src.hasOneUse())
  1229. return false;
  1230. return true;
  1231. }
  1232. /// Return a value X such that Val = X * Scale, or null if none.
  1233. /// If the multiplication is known not to overflow, then NoSignedWrap is set.
  1234. Value *InstCombinerImpl::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
  1235. assert(isa<IntegerType>(Val->getType()) && "Can only descale integers!");
  1236. assert(cast<IntegerType>(Val->getType())->getBitWidth() ==
  1237. Scale.getBitWidth() && "Scale not compatible with value!");
  1238. // If Val is zero or Scale is one then Val = Val * Scale.
  1239. if (match(Val, m_Zero()) || Scale == 1) {
  1240. NoSignedWrap = true;
  1241. return Val;
  1242. }
  1243. // If Scale is zero then it does not divide Val.
  1244. if (Scale.isMinValue())
  1245. return nullptr;
  1246. // Look through chains of multiplications, searching for a constant that is
  1247. // divisible by Scale. For example, descaling X*(Y*(Z*4)) by a factor of 4
  1248. // will find the constant factor 4 and produce X*(Y*Z). Descaling X*(Y*8) by
  1249. // a factor of 4 will produce X*(Y*2). The principle of operation is to bore
  1250. // down from Val:
  1251. //
  1252. // Val = M1 * X || Analysis starts here and works down
  1253. // M1 = M2 * Y || Doesn't descend into terms with more
  1254. // M2 = Z * 4 \/ than one use
  1255. //
  1256. // Then to modify a term at the bottom:
  1257. //
  1258. // Val = M1 * X
  1259. // M1 = Z * Y || Replaced M2 with Z
  1260. //
  1261. // Then to work back up correcting nsw flags.
  1262. // Op - the term we are currently analyzing. Starts at Val then drills down.
  1263. // Replaced with its descaled value before exiting from the drill down loop.
  1264. Value *Op = Val;
  1265. // Parent - initially null, but after drilling down notes where Op came from.
  1266. // In the example above, Parent is (Val, 0) when Op is M1, because M1 is the
  1267. // 0'th operand of Val.
  1268. std::pair<Instruction *, unsigned> Parent;
  1269. // Set if the transform requires a descaling at deeper levels that doesn't
  1270. // overflow.
  1271. bool RequireNoSignedWrap = false;
  1272. // Log base 2 of the scale. Negative if not a power of 2.
  1273. int32_t logScale = Scale.exactLogBase2();
  1274. for (;; Op = Parent.first->getOperand(Parent.second)) { // Drill down
  1275. if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
  1276. // If Op is a constant divisible by Scale then descale to the quotient.
  1277. APInt Quotient(Scale), Remainder(Scale); // Init ensures right bitwidth.
  1278. APInt::sdivrem(CI->getValue(), Scale, Quotient, Remainder);
  1279. if (!Remainder.isMinValue())
  1280. // Not divisible by Scale.
  1281. return nullptr;
  1282. // Replace with the quotient in the parent.
  1283. Op = ConstantInt::get(CI->getType(), Quotient);
  1284. NoSignedWrap = true;
  1285. break;
  1286. }
  1287. if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op)) {
  1288. if (BO->getOpcode() == Instruction::Mul) {
  1289. // Multiplication.
  1290. NoSignedWrap = BO->hasNoSignedWrap();
  1291. if (RequireNoSignedWrap && !NoSignedWrap)
  1292. return nullptr;
  1293. // There are three cases for multiplication: multiplication by exactly
  1294. // the scale, multiplication by a constant different to the scale, and
  1295. // multiplication by something else.
  1296. Value *LHS = BO->getOperand(0);
  1297. Value *RHS = BO->getOperand(1);
  1298. if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
  1299. // Multiplication by a constant.
  1300. if (CI->getValue() == Scale) {
  1301. // Multiplication by exactly the scale, replace the multiplication
  1302. // by its left-hand side in the parent.
  1303. Op = LHS;
  1304. break;
  1305. }
  1306. // Otherwise drill down into the constant.
  1307. if (!Op->hasOneUse())
  1308. return nullptr;
  1309. Parent = std::make_pair(BO, 1);
  1310. continue;
  1311. }
  1312. // Multiplication by something else. Drill down into the left-hand side
  1313. // since that's where the reassociate pass puts the good stuff.
  1314. if (!Op->hasOneUse())
  1315. return nullptr;
  1316. Parent = std::make_pair(BO, 0);
  1317. continue;
  1318. }
  1319. if (logScale > 0 && BO->getOpcode() == Instruction::Shl &&
  1320. isa<ConstantInt>(BO->getOperand(1))) {
  1321. // Multiplication by a power of 2.
  1322. NoSignedWrap = BO->hasNoSignedWrap();
  1323. if (RequireNoSignedWrap && !NoSignedWrap)
  1324. return nullptr;
  1325. Value *LHS = BO->getOperand(0);
  1326. int32_t Amt = cast<ConstantInt>(BO->getOperand(1))->
  1327. getLimitedValue(Scale.getBitWidth());
  1328. // Op = LHS << Amt.
  1329. if (Amt == logScale) {
  1330. // Multiplication by exactly the scale, replace the multiplication
  1331. // by its left-hand side in the parent.
  1332. Op = LHS;
  1333. break;
  1334. }
  1335. if (Amt < logScale || !Op->hasOneUse())
  1336. return nullptr;
  1337. // Multiplication by more than the scale. Reduce the multiplying amount
  1338. // by the scale in the parent.
  1339. Parent = std::make_pair(BO, 1);
  1340. Op = ConstantInt::get(BO->getType(), Amt - logScale);
  1341. break;
  1342. }
  1343. }
  1344. if (!Op->hasOneUse())
  1345. return nullptr;
  1346. if (CastInst *Cast = dyn_cast<CastInst>(Op)) {
  1347. if (Cast->getOpcode() == Instruction::SExt) {
  1348. // Op is sign-extended from a smaller type, descale in the smaller type.
  1349. unsigned SmallSize = Cast->getSrcTy()->getPrimitiveSizeInBits();
  1350. APInt SmallScale = Scale.trunc(SmallSize);
  1351. // Suppose Op = sext X, and we descale X as Y * SmallScale. We want to
  1352. // descale Op as (sext Y) * Scale. In order to have
  1353. // sext (Y * SmallScale) = (sext Y) * Scale
  1354. // some conditions need to hold however: SmallScale must sign-extend to
  1355. // Scale and the multiplication Y * SmallScale should not overflow.
  1356. if (SmallScale.sext(Scale.getBitWidth()) != Scale)
  1357. // SmallScale does not sign-extend to Scale.
  1358. return nullptr;
  1359. assert(SmallScale.exactLogBase2() == logScale);
  1360. // Require that Y * SmallScale must not overflow.
  1361. RequireNoSignedWrap = true;
  1362. // Drill down through the cast.
  1363. Parent = std::make_pair(Cast, 0);
  1364. Scale = SmallScale;
  1365. continue;
  1366. }
  1367. if (Cast->getOpcode() == Instruction::Trunc) {
  1368. // Op is truncated from a larger type, descale in the larger type.
  1369. // Suppose Op = trunc X, and we descale X as Y * sext Scale. Then
  1370. // trunc (Y * sext Scale) = (trunc Y) * Scale
  1371. // always holds. However (trunc Y) * Scale may overflow even if
  1372. // trunc (Y * sext Scale) does not, so nsw flags need to be cleared
  1373. // from this point up in the expression (see later).
  1374. if (RequireNoSignedWrap)
  1375. return nullptr;
  1376. // Drill down through the cast.
  1377. unsigned LargeSize = Cast->getSrcTy()->getPrimitiveSizeInBits();
  1378. Parent = std::make_pair(Cast, 0);
  1379. Scale = Scale.sext(LargeSize);
  1380. if (logScale + 1 == (int32_t)Cast->getType()->getPrimitiveSizeInBits())
  1381. logScale = -1;
  1382. assert(Scale.exactLogBase2() == logScale);
  1383. continue;
  1384. }
  1385. }
  1386. // Unsupported expression, bail out.
  1387. return nullptr;
  1388. }
  1389. // If Op is zero then Val = Op * Scale.
  1390. if (match(Op, m_Zero())) {
  1391. NoSignedWrap = true;
  1392. return Op;
  1393. }
  1394. // We know that we can successfully descale, so from here on we can safely
  1395. // modify the IR. Op holds the descaled version of the deepest term in the
  1396. // expression. NoSignedWrap is 'true' if multiplying Op by Scale is known
  1397. // not to overflow.
  1398. if (!Parent.first)
  1399. // The expression only had one term.
  1400. return Op;
  1401. // Rewrite the parent using the descaled version of its operand.
  1402. assert(Parent.first->hasOneUse() && "Drilled down when more than one use!");
  1403. assert(Op != Parent.first->getOperand(Parent.second) &&
  1404. "Descaling was a no-op?");
  1405. replaceOperand(*Parent.first, Parent.second, Op);
  1406. Worklist.push(Parent.first);
  1407. // Now work back up the expression correcting nsw flags. The logic is based
  1408. // on the following observation: if X * Y is known not to overflow as a signed
  1409. // multiplication, and Y is replaced by a value Z with smaller absolute value,
  1410. // then X * Z will not overflow as a signed multiplication either. As we work
  1411. // our way up, having NoSignedWrap 'true' means that the descaled value at the
  1412. // current level has strictly smaller absolute value than the original.
  1413. Instruction *Ancestor = Parent.first;
  1414. do {
  1415. if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Ancestor)) {
  1416. // If the multiplication wasn't nsw then we can't say anything about the
  1417. // value of the descaled multiplication, and we have to clear nsw flags
  1418. // from this point on up.
  1419. bool OpNoSignedWrap = BO->hasNoSignedWrap();
  1420. NoSignedWrap &= OpNoSignedWrap;
  1421. if (NoSignedWrap != OpNoSignedWrap) {
  1422. BO->setHasNoSignedWrap(NoSignedWrap);
  1423. Worklist.push(Ancestor);
  1424. }
  1425. } else if (Ancestor->getOpcode() == Instruction::Trunc) {
  1426. // The fact that the descaled input to the trunc has smaller absolute
  1427. // value than the original input doesn't tell us anything useful about
  1428. // the absolute values of the truncations.
  1429. NoSignedWrap = false;
  1430. }
  1431. assert((Ancestor->getOpcode() != Instruction::SExt || NoSignedWrap) &&
  1432. "Failed to keep proper track of nsw flags while drilling down?");
  1433. if (Ancestor == Val)
  1434. // Got to the top, all done!
  1435. return Val;
  1436. // Move up one level in the expression.
  1437. assert(Ancestor->hasOneUse() && "Drilled down when more than one use!");
  1438. Ancestor = Ancestor->user_back();
  1439. } while (true);
  1440. }
  1441. Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
  1442. if (!isa<VectorType>(Inst.getType()))
  1443. return nullptr;
  1444. BinaryOperator::BinaryOps Opcode = Inst.getOpcode();
  1445. Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1);
  1446. assert(cast<VectorType>(LHS->getType())->getElementCount() ==
  1447. cast<VectorType>(Inst.getType())->getElementCount());
  1448. assert(cast<VectorType>(RHS->getType())->getElementCount() ==
  1449. cast<VectorType>(Inst.getType())->getElementCount());
  1450. // If both operands of the binop are vector concatenations, then perform the
  1451. // narrow binop on each pair of the source operands followed by concatenation
  1452. // of the results.
  1453. Value *L0, *L1, *R0, *R1;
  1454. ArrayRef<int> Mask;
  1455. if (match(LHS, m_Shuffle(m_Value(L0), m_Value(L1), m_Mask(Mask))) &&
  1456. match(RHS, m_Shuffle(m_Value(R0), m_Value(R1), m_SpecificMask(Mask))) &&
  1457. LHS->hasOneUse() && RHS->hasOneUse() &&
  1458. cast<ShuffleVectorInst>(LHS)->isConcat() &&
  1459. cast<ShuffleVectorInst>(RHS)->isConcat()) {
  1460. // This transform does not have the speculative execution constraint as
  1461. // below because the shuffle is a concatenation. The new binops are
  1462. // operating on exactly the same elements as the existing binop.
  1463. // TODO: We could ease the mask requirement to allow different undef lanes,
  1464. // but that requires an analysis of the binop-with-undef output value.
  1465. Value *NewBO0 = Builder.CreateBinOp(Opcode, L0, R0);
  1466. if (auto *BO = dyn_cast<BinaryOperator>(NewBO0))
  1467. BO->copyIRFlags(&Inst);
  1468. Value *NewBO1 = Builder.CreateBinOp(Opcode, L1, R1);
  1469. if (auto *BO = dyn_cast<BinaryOperator>(NewBO1))
  1470. BO->copyIRFlags(&Inst);
  1471. return new ShuffleVectorInst(NewBO0, NewBO1, Mask);
  1472. }
  1473. auto createBinOpReverse = [&](Value *X, Value *Y) {
  1474. Value *V = Builder.CreateBinOp(Opcode, X, Y, Inst.getName());
  1475. if (auto *BO = dyn_cast<BinaryOperator>(V))
  1476. BO->copyIRFlags(&Inst);
  1477. Module *M = Inst.getModule();
  1478. Function *F = Intrinsic::getDeclaration(
  1479. M, Intrinsic::experimental_vector_reverse, V->getType());
  1480. return CallInst::Create(F, V);
  1481. };
  1482. // NOTE: Reverse shuffles don't require the speculative execution protection
  1483. // below because they don't affect which lanes take part in the computation.
  1484. Value *V1, *V2;
  1485. if (match(LHS, m_VecReverse(m_Value(V1)))) {
  1486. // Op(rev(V1), rev(V2)) -> rev(Op(V1, V2))
  1487. if (match(RHS, m_VecReverse(m_Value(V2))) &&
  1488. (LHS->hasOneUse() || RHS->hasOneUse() ||
  1489. (LHS == RHS && LHS->hasNUses(2))))
  1490. return createBinOpReverse(V1, V2);
  1491. // Op(rev(V1), RHSSplat)) -> rev(Op(V1, RHSSplat))
  1492. if (LHS->hasOneUse() && isSplatValue(RHS))
  1493. return createBinOpReverse(V1, RHS);
  1494. }
  1495. // Op(LHSSplat, rev(V2)) -> rev(Op(LHSSplat, V2))
  1496. else if (isSplatValue(LHS) && match(RHS, m_OneUse(m_VecReverse(m_Value(V2)))))
  1497. return createBinOpReverse(LHS, V2);
  1498. // It may not be safe to reorder shuffles and things like div, urem, etc.
  1499. // because we may trap when executing those ops on unknown vector elements.
  1500. // See PR20059.
  1501. if (!isSafeToSpeculativelyExecute(&Inst))
  1502. return nullptr;
  1503. auto createBinOpShuffle = [&](Value *X, Value *Y, ArrayRef<int> M) {
  1504. Value *XY = Builder.CreateBinOp(Opcode, X, Y);
  1505. if (auto *BO = dyn_cast<BinaryOperator>(XY))
  1506. BO->copyIRFlags(&Inst);
  1507. return new ShuffleVectorInst(XY, M);
  1508. };
  1509. // If both arguments of the binary operation are shuffles that use the same
  1510. // mask and shuffle within a single vector, move the shuffle after the binop.
  1511. if (match(LHS, m_Shuffle(m_Value(V1), m_Undef(), m_Mask(Mask))) &&
  1512. match(RHS, m_Shuffle(m_Value(V2), m_Undef(), m_SpecificMask(Mask))) &&
  1513. V1->getType() == V2->getType() &&
  1514. (LHS->hasOneUse() || RHS->hasOneUse() || LHS == RHS)) {
  1515. // Op(shuffle(V1, Mask), shuffle(V2, Mask)) -> shuffle(Op(V1, V2), Mask)
  1516. return createBinOpShuffle(V1, V2, Mask);
  1517. }
  1518. // If both arguments of a commutative binop are select-shuffles that use the
  1519. // same mask with commuted operands, the shuffles are unnecessary.
  1520. if (Inst.isCommutative() &&
  1521. match(LHS, m_Shuffle(m_Value(V1), m_Value(V2), m_Mask(Mask))) &&
  1522. match(RHS,
  1523. m_Shuffle(m_Specific(V2), m_Specific(V1), m_SpecificMask(Mask)))) {
  1524. auto *LShuf = cast<ShuffleVectorInst>(LHS);
  1525. auto *RShuf = cast<ShuffleVectorInst>(RHS);
  1526. // TODO: Allow shuffles that contain undefs in the mask?
  1527. // That is legal, but it reduces undef knowledge.
  1528. // TODO: Allow arbitrary shuffles by shuffling after binop?
  1529. // That might be legal, but we have to deal with poison.
  1530. if (LShuf->isSelect() &&
  1531. !is_contained(LShuf->getShuffleMask(), UndefMaskElem) &&
  1532. RShuf->isSelect() &&
  1533. !is_contained(RShuf->getShuffleMask(), UndefMaskElem)) {
  1534. // Example:
  1535. // LHS = shuffle V1, V2, <0, 5, 6, 3>
  1536. // RHS = shuffle V2, V1, <0, 5, 6, 3>
  1537. // LHS + RHS --> (V10+V20, V21+V11, V22+V12, V13+V23) --> V1 + V2
  1538. Instruction *NewBO = BinaryOperator::Create(Opcode, V1, V2);
  1539. NewBO->copyIRFlags(&Inst);
  1540. return NewBO;
  1541. }
  1542. }
  1543. // If one argument is a shuffle within one vector and the other is a constant,
  1544. // try moving the shuffle after the binary operation. This canonicalization
  1545. // intends to move shuffles closer to other shuffles and binops closer to
  1546. // other binops, so they can be folded. It may also enable demanded elements
  1547. // transforms.
  1548. Constant *C;
  1549. auto *InstVTy = dyn_cast<FixedVectorType>(Inst.getType());
  1550. if (InstVTy &&
  1551. match(&Inst,
  1552. m_c_BinOp(m_OneUse(m_Shuffle(m_Value(V1), m_Undef(), m_Mask(Mask))),
  1553. m_ImmConstant(C))) &&
  1554. cast<FixedVectorType>(V1->getType())->getNumElements() <=
  1555. InstVTy->getNumElements()) {
  1556. assert(InstVTy->getScalarType() == V1->getType()->getScalarType() &&
  1557. "Shuffle should not change scalar type");
  1558. // Find constant NewC that has property:
  1559. // shuffle(NewC, ShMask) = C
  1560. // If such constant does not exist (example: ShMask=<0,0> and C=<1,2>)
  1561. // reorder is not possible. A 1-to-1 mapping is not required. Example:
  1562. // ShMask = <1,1,2,2> and C = <5,5,6,6> --> NewC = <undef,5,6,undef>
  1563. bool ConstOp1 = isa<Constant>(RHS);
  1564. ArrayRef<int> ShMask = Mask;
  1565. unsigned SrcVecNumElts =
  1566. cast<FixedVectorType>(V1->getType())->getNumElements();
  1567. UndefValue *UndefScalar = UndefValue::get(C->getType()->getScalarType());
  1568. SmallVector<Constant *, 16> NewVecC(SrcVecNumElts, UndefScalar);
  1569. bool MayChange = true;
  1570. unsigned NumElts = InstVTy->getNumElements();
  1571. for (unsigned I = 0; I < NumElts; ++I) {
  1572. Constant *CElt = C->getAggregateElement(I);
  1573. if (ShMask[I] >= 0) {
  1574. assert(ShMask[I] < (int)NumElts && "Not expecting narrowing shuffle");
  1575. Constant *NewCElt = NewVecC[ShMask[I]];
  1576. // Bail out if:
  1577. // 1. The constant vector contains a constant expression.
  1578. // 2. The shuffle needs an element of the constant vector that can't
  1579. // be mapped to a new constant vector.
  1580. // 3. This is a widening shuffle that copies elements of V1 into the
  1581. // extended elements (extending with undef is allowed).
  1582. if (!CElt || (!isa<UndefValue>(NewCElt) && NewCElt != CElt) ||
  1583. I >= SrcVecNumElts) {
  1584. MayChange = false;
  1585. break;
  1586. }
  1587. NewVecC[ShMask[I]] = CElt;
  1588. }
  1589. // If this is a widening shuffle, we must be able to extend with undef
  1590. // elements. If the original binop does not produce an undef in the high
  1591. // lanes, then this transform is not safe.
  1592. // Similarly for undef lanes due to the shuffle mask, we can only
  1593. // transform binops that preserve undef.
  1594. // TODO: We could shuffle those non-undef constant values into the
  1595. // result by using a constant vector (rather than an undef vector)
  1596. // as operand 1 of the new binop, but that might be too aggressive
  1597. // for target-independent shuffle creation.
  1598. if (I >= SrcVecNumElts || ShMask[I] < 0) {
  1599. Constant *MaybeUndef =
  1600. ConstOp1
  1601. ? ConstantFoldBinaryOpOperands(Opcode, UndefScalar, CElt, DL)
  1602. : ConstantFoldBinaryOpOperands(Opcode, CElt, UndefScalar, DL);
  1603. if (!MaybeUndef || !match(MaybeUndef, m_Undef())) {
  1604. MayChange = false;
  1605. break;
  1606. }
  1607. }
  1608. }
  1609. if (MayChange) {
  1610. Constant *NewC = ConstantVector::get(NewVecC);
  1611. // It may not be safe to execute a binop on a vector with undef elements
  1612. // because the entire instruction can be folded to undef or create poison
  1613. // that did not exist in the original code.
  1614. if (Inst.isIntDivRem() || (Inst.isShift() && ConstOp1))
  1615. NewC = getSafeVectorConstantForBinop(Opcode, NewC, ConstOp1);
  1616. // Op(shuffle(V1, Mask), C) -> shuffle(Op(V1, NewC), Mask)
  1617. // Op(C, shuffle(V1, Mask)) -> shuffle(Op(NewC, V1), Mask)
  1618. Value *NewLHS = ConstOp1 ? V1 : NewC;
  1619. Value *NewRHS = ConstOp1 ? NewC : V1;
  1620. return createBinOpShuffle(NewLHS, NewRHS, Mask);
  1621. }
  1622. }
  1623. // Try to reassociate to sink a splat shuffle after a binary operation.
  1624. if (Inst.isAssociative() && Inst.isCommutative()) {
  1625. // Canonicalize shuffle operand as LHS.
  1626. if (isa<ShuffleVectorInst>(RHS))
  1627. std::swap(LHS, RHS);
  1628. Value *X;
  1629. ArrayRef<int> MaskC;
  1630. int SplatIndex;
  1631. Value *Y, *OtherOp;
  1632. if (!match(LHS,
  1633. m_OneUse(m_Shuffle(m_Value(X), m_Undef(), m_Mask(MaskC)))) ||
  1634. !match(MaskC, m_SplatOrUndefMask(SplatIndex)) ||
  1635. X->getType() != Inst.getType() ||
  1636. !match(RHS, m_OneUse(m_BinOp(Opcode, m_Value(Y), m_Value(OtherOp)))))
  1637. return nullptr;
  1638. // FIXME: This may not be safe if the analysis allows undef elements. By
  1639. // moving 'Y' before the splat shuffle, we are implicitly assuming
  1640. // that it is not undef/poison at the splat index.
  1641. if (isSplatValue(OtherOp, SplatIndex)) {
  1642. std::swap(Y, OtherOp);
  1643. } else if (!isSplatValue(Y, SplatIndex)) {
  1644. return nullptr;
  1645. }
  1646. // X and Y are splatted values, so perform the binary operation on those
  1647. // values followed by a splat followed by the 2nd binary operation:
  1648. // bo (splat X), (bo Y, OtherOp) --> bo (splat (bo X, Y)), OtherOp
  1649. Value *NewBO = Builder.CreateBinOp(Opcode, X, Y);
  1650. SmallVector<int, 8> NewMask(MaskC.size(), SplatIndex);
  1651. Value *NewSplat = Builder.CreateShuffleVector(NewBO, NewMask);
  1652. Instruction *R = BinaryOperator::Create(Opcode, NewSplat, OtherOp);
  1653. // Intersect FMF on both new binops. Other (poison-generating) flags are
  1654. // dropped to be safe.
  1655. if (isa<FPMathOperator>(R)) {
  1656. R->copyFastMathFlags(&Inst);
  1657. R->andIRFlags(RHS);
  1658. }
  1659. if (auto *NewInstBO = dyn_cast<BinaryOperator>(NewBO))
  1660. NewInstBO->copyIRFlags(R);
  1661. return R;
  1662. }
  1663. return nullptr;
  1664. }
  1665. /// Try to narrow the width of a binop if at least 1 operand is an extend of
  1666. /// of a value. This requires a potentially expensive known bits check to make
  1667. /// sure the narrow op does not overflow.
  1668. Instruction *InstCombinerImpl::narrowMathIfNoOverflow(BinaryOperator &BO) {
  1669. // We need at least one extended operand.
  1670. Value *Op0 = BO.getOperand(0), *Op1 = BO.getOperand(1);
  1671. // If this is a sub, we swap the operands since we always want an extension
  1672. // on the RHS. The LHS can be an extension or a constant.
  1673. if (BO.getOpcode() == Instruction::Sub)
  1674. std::swap(Op0, Op1);
  1675. Value *X;
  1676. bool IsSext = match(Op0, m_SExt(m_Value(X)));
  1677. if (!IsSext && !match(Op0, m_ZExt(m_Value(X))))
  1678. return nullptr;
  1679. // If both operands are the same extension from the same source type and we
  1680. // can eliminate at least one (hasOneUse), this might work.
  1681. CastInst::CastOps CastOpc = IsSext ? Instruction::SExt : Instruction::ZExt;
  1682. Value *Y;
  1683. if (!(match(Op1, m_ZExtOrSExt(m_Value(Y))) && X->getType() == Y->getType() &&
  1684. cast<Operator>(Op1)->getOpcode() == CastOpc &&
  1685. (Op0->hasOneUse() || Op1->hasOneUse()))) {
  1686. // If that did not match, see if we have a suitable constant operand.
  1687. // Truncating and extending must produce the same constant.
  1688. Constant *WideC;
  1689. if (!Op0->hasOneUse() || !match(Op1, m_Constant(WideC)))
  1690. return nullptr;
  1691. Constant *NarrowC = ConstantExpr::getTrunc(WideC, X->getType());
  1692. if (ConstantExpr::getCast(CastOpc, NarrowC, BO.getType()) != WideC)
  1693. return nullptr;
  1694. Y = NarrowC;
  1695. }
  1696. // Swap back now that we found our operands.
  1697. if (BO.getOpcode() == Instruction::Sub)
  1698. std::swap(X, Y);
  1699. // Both operands have narrow versions. Last step: the math must not overflow
  1700. // in the narrow width.
  1701. if (!willNotOverflow(BO.getOpcode(), X, Y, BO, IsSext))
  1702. return nullptr;
  1703. // bo (ext X), (ext Y) --> ext (bo X, Y)
  1704. // bo (ext X), C --> ext (bo X, C')
  1705. Value *NarrowBO = Builder.CreateBinOp(BO.getOpcode(), X, Y, "narrow");
  1706. if (auto *NewBinOp = dyn_cast<BinaryOperator>(NarrowBO)) {
  1707. if (IsSext)
  1708. NewBinOp->setHasNoSignedWrap();
  1709. else
  1710. NewBinOp->setHasNoUnsignedWrap();
  1711. }
  1712. return CastInst::Create(CastOpc, NarrowBO, BO.getType());
  1713. }
  1714. static bool isMergedGEPInBounds(GEPOperator &GEP1, GEPOperator &GEP2) {
  1715. // At least one GEP must be inbounds.
  1716. if (!GEP1.isInBounds() && !GEP2.isInBounds())
  1717. return false;
  1718. return (GEP1.isInBounds() || GEP1.hasAllZeroIndices()) &&
  1719. (GEP2.isInBounds() || GEP2.hasAllZeroIndices());
  1720. }
  1721. /// Thread a GEP operation with constant indices through the constant true/false
  1722. /// arms of a select.
  1723. static Instruction *foldSelectGEP(GetElementPtrInst &GEP,
  1724. InstCombiner::BuilderTy &Builder) {
  1725. if (!GEP.hasAllConstantIndices())
  1726. return nullptr;
  1727. Instruction *Sel;
  1728. Value *Cond;
  1729. Constant *TrueC, *FalseC;
  1730. if (!match(GEP.getPointerOperand(), m_Instruction(Sel)) ||
  1731. !match(Sel,
  1732. m_Select(m_Value(Cond), m_Constant(TrueC), m_Constant(FalseC))))
  1733. return nullptr;
  1734. // gep (select Cond, TrueC, FalseC), IndexC --> select Cond, TrueC', FalseC'
  1735. // Propagate 'inbounds' and metadata from existing instructions.
  1736. // Note: using IRBuilder to create the constants for efficiency.
  1737. SmallVector<Value *, 4> IndexC(GEP.indices());
  1738. bool IsInBounds = GEP.isInBounds();
  1739. Type *Ty = GEP.getSourceElementType();
  1740. Value *NewTrueC = Builder.CreateGEP(Ty, TrueC, IndexC, "", IsInBounds);
  1741. Value *NewFalseC = Builder.CreateGEP(Ty, FalseC, IndexC, "", IsInBounds);
  1742. return SelectInst::Create(Cond, NewTrueC, NewFalseC, "", nullptr, Sel);
  1743. }
  1744. Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,
  1745. GEPOperator *Src) {
  1746. // Combine Indices - If the source pointer to this getelementptr instruction
  1747. // is a getelementptr instruction with matching element type, combine the
  1748. // indices of the two getelementptr instructions into a single instruction.
  1749. if (!shouldMergeGEPs(*cast<GEPOperator>(&GEP), *Src))
  1750. return nullptr;
  1751. if (Src->getResultElementType() == GEP.getSourceElementType() &&
  1752. Src->getNumOperands() == 2 && GEP.getNumOperands() == 2 &&
  1753. Src->hasOneUse()) {
  1754. Value *GO1 = GEP.getOperand(1);
  1755. Value *SO1 = Src->getOperand(1);
  1756. if (LI) {
  1757. // Try to reassociate loop invariant GEP chains to enable LICM.
  1758. if (Loop *L = LI->getLoopFor(GEP.getParent())) {
  1759. // Reassociate the two GEPs if SO1 is variant in the loop and GO1 is
  1760. // invariant: this breaks the dependence between GEPs and allows LICM
  1761. // to hoist the invariant part out of the loop.
  1762. if (L->isLoopInvariant(GO1) && !L->isLoopInvariant(SO1)) {
  1763. // The swapped GEPs are inbounds if both original GEPs are inbounds
  1764. // and the sign of the offsets is the same. For simplicity, only
  1765. // handle both offsets being non-negative.
  1766. bool IsInBounds = Src->isInBounds() && GEP.isInBounds() &&
  1767. isKnownNonNegative(SO1, DL, 0, &AC, &GEP, &DT) &&
  1768. isKnownNonNegative(GO1, DL, 0, &AC, &GEP, &DT);
  1769. // Put NewSrc at same location as %src.
  1770. Builder.SetInsertPoint(cast<Instruction>(Src));
  1771. Value *NewSrc = Builder.CreateGEP(GEP.getSourceElementType(),
  1772. Src->getPointerOperand(), GO1,
  1773. Src->getName(), IsInBounds);
  1774. GetElementPtrInst *NewGEP = GetElementPtrInst::Create(
  1775. GEP.getSourceElementType(), NewSrc, {SO1});
  1776. NewGEP->setIsInBounds(IsInBounds);
  1777. return NewGEP;
  1778. }
  1779. }
  1780. }
  1781. }
  1782. // Note that if our source is a gep chain itself then we wait for that
  1783. // chain to be resolved before we perform this transformation. This
  1784. // avoids us creating a TON of code in some cases.
  1785. if (auto *SrcGEP = dyn_cast<GEPOperator>(Src->getOperand(0)))
  1786. if (SrcGEP->getNumOperands() == 2 && shouldMergeGEPs(*Src, *SrcGEP))
  1787. return nullptr; // Wait until our source is folded to completion.
  1788. // For constant GEPs, use a more general offset-based folding approach.
  1789. // Only do this for opaque pointers, as the result element type may change.
  1790. Type *PtrTy = Src->getType()->getScalarType();
  1791. if (PtrTy->isOpaquePointerTy() && GEP.hasAllConstantIndices() &&
  1792. (Src->hasOneUse() || Src->hasAllConstantIndices())) {
  1793. // Split Src into a variable part and a constant suffix.
  1794. gep_type_iterator GTI = gep_type_begin(*Src);
  1795. Type *BaseType = GTI.getIndexedType();
  1796. bool IsFirstType = true;
  1797. unsigned NumVarIndices = 0;
  1798. for (auto Pair : enumerate(Src->indices())) {
  1799. if (!isa<ConstantInt>(Pair.value())) {
  1800. BaseType = GTI.getIndexedType();
  1801. IsFirstType = false;
  1802. NumVarIndices = Pair.index() + 1;
  1803. }
  1804. ++GTI;
  1805. }
  1806. // Determine the offset for the constant suffix of Src.
  1807. APInt Offset(DL.getIndexTypeSizeInBits(PtrTy), 0);
  1808. if (NumVarIndices != Src->getNumIndices()) {
  1809. // FIXME: getIndexedOffsetInType() does not handled scalable vectors.
  1810. if (isa<ScalableVectorType>(BaseType))
  1811. return nullptr;
  1812. SmallVector<Value *> ConstantIndices;
  1813. if (!IsFirstType)
  1814. ConstantIndices.push_back(
  1815. Constant::getNullValue(Type::getInt32Ty(GEP.getContext())));
  1816. append_range(ConstantIndices, drop_begin(Src->indices(), NumVarIndices));
  1817. Offset += DL.getIndexedOffsetInType(BaseType, ConstantIndices);
  1818. }
  1819. // Add the offset for GEP (which is fully constant).
  1820. if (!GEP.accumulateConstantOffset(DL, Offset))
  1821. return nullptr;
  1822. APInt OffsetOld = Offset;
  1823. // Convert the total offset back into indices.
  1824. SmallVector<APInt> ConstIndices =
  1825. DL.getGEPIndicesForOffset(BaseType, Offset);
  1826. if (!Offset.isZero() || (!IsFirstType && !ConstIndices[0].isZero())) {
  1827. // If both GEP are constant-indexed, and cannot be merged in either way,
  1828. // convert them to a GEP of i8.
  1829. if (Src->hasAllConstantIndices())
  1830. return isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP))
  1831. ? GetElementPtrInst::CreateInBounds(
  1832. Builder.getInt8Ty(), Src->getOperand(0),
  1833. Builder.getInt(OffsetOld), GEP.getName())
  1834. : GetElementPtrInst::Create(
  1835. Builder.getInt8Ty(), Src->getOperand(0),
  1836. Builder.getInt(OffsetOld), GEP.getName());
  1837. return nullptr;
  1838. }
  1839. bool IsInBounds = isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP));
  1840. SmallVector<Value *> Indices;
  1841. append_range(Indices, drop_end(Src->indices(),
  1842. Src->getNumIndices() - NumVarIndices));
  1843. for (const APInt &Idx : drop_begin(ConstIndices, !IsFirstType)) {
  1844. Indices.push_back(ConstantInt::get(GEP.getContext(), Idx));
  1845. // Even if the total offset is inbounds, we may end up representing it
  1846. // by first performing a larger negative offset, and then a smaller
  1847. // positive one. The large negative offset might go out of bounds. Only
  1848. // preserve inbounds if all signs are the same.
  1849. IsInBounds &= Idx.isNonNegative() == ConstIndices[0].isNonNegative();
  1850. }
  1851. return IsInBounds
  1852. ? GetElementPtrInst::CreateInBounds(Src->getSourceElementType(),
  1853. Src->getOperand(0), Indices,
  1854. GEP.getName())
  1855. : GetElementPtrInst::Create(Src->getSourceElementType(),
  1856. Src->getOperand(0), Indices,
  1857. GEP.getName());
  1858. }
  1859. if (Src->getResultElementType() != GEP.getSourceElementType())
  1860. return nullptr;
  1861. SmallVector<Value*, 8> Indices;
  1862. // Find out whether the last index in the source GEP is a sequential idx.
  1863. bool EndsWithSequential = false;
  1864. for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src);
  1865. I != E; ++I)
  1866. EndsWithSequential = I.isSequential();
  1867. // Can we combine the two pointer arithmetics offsets?
  1868. if (EndsWithSequential) {
  1869. // Replace: gep (gep %P, long B), long A, ...
  1870. // With: T = long A+B; gep %P, T, ...
  1871. Value *SO1 = Src->getOperand(Src->getNumOperands()-1);
  1872. Value *GO1 = GEP.getOperand(1);
  1873. // If they aren't the same type, then the input hasn't been processed
  1874. // by the loop above yet (which canonicalizes sequential index types to
  1875. // intptr_t). Just avoid transforming this until the input has been
  1876. // normalized.
  1877. if (SO1->getType() != GO1->getType())
  1878. return nullptr;
  1879. Value *Sum =
  1880. simplifyAddInst(GO1, SO1, false, false, SQ.getWithInstruction(&GEP));
  1881. // Only do the combine when we are sure the cost after the
  1882. // merge is never more than that before the merge.
  1883. if (Sum == nullptr)
  1884. return nullptr;
  1885. // Update the GEP in place if possible.
  1886. if (Src->getNumOperands() == 2) {
  1887. GEP.setIsInBounds(isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP)));
  1888. replaceOperand(GEP, 0, Src->getOperand(0));
  1889. replaceOperand(GEP, 1, Sum);
  1890. return &GEP;
  1891. }
  1892. Indices.append(Src->op_begin()+1, Src->op_end()-1);
  1893. Indices.push_back(Sum);
  1894. Indices.append(GEP.op_begin()+2, GEP.op_end());
  1895. } else if (isa<Constant>(*GEP.idx_begin()) &&
  1896. cast<Constant>(*GEP.idx_begin())->isNullValue() &&
  1897. Src->getNumOperands() != 1) {
  1898. // Otherwise we can do the fold if the first index of the GEP is a zero
  1899. Indices.append(Src->op_begin()+1, Src->op_end());
  1900. Indices.append(GEP.idx_begin()+1, GEP.idx_end());
  1901. }
  1902. if (!Indices.empty())
  1903. return isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP))
  1904. ? GetElementPtrInst::CreateInBounds(
  1905. Src->getSourceElementType(), Src->getOperand(0), Indices,
  1906. GEP.getName())
  1907. : GetElementPtrInst::Create(Src->getSourceElementType(),
  1908. Src->getOperand(0), Indices,
  1909. GEP.getName());
  1910. return nullptr;
  1911. }
  1912. // Note that we may have also stripped an address space cast in between.
  1913. Instruction *InstCombinerImpl::visitGEPOfBitcast(BitCastInst *BCI,
  1914. GetElementPtrInst &GEP) {
  1915. // With opaque pointers, there is no pointer element type we can use to
  1916. // adjust the GEP type.
  1917. PointerType *SrcType = cast<PointerType>(BCI->getSrcTy());
  1918. if (SrcType->isOpaque())
  1919. return nullptr;
  1920. Type *GEPEltType = GEP.getSourceElementType();
  1921. Type *SrcEltType = SrcType->getNonOpaquePointerElementType();
  1922. Value *SrcOp = BCI->getOperand(0);
  1923. // GEP directly using the source operand if this GEP is accessing an element
  1924. // of a bitcasted pointer to vector or array of the same dimensions:
  1925. // gep (bitcast <c x ty>* X to [c x ty]*), Y, Z --> gep X, Y, Z
  1926. // gep (bitcast [c x ty]* X to <c x ty>*), Y, Z --> gep X, Y, Z
  1927. auto areMatchingArrayAndVecTypes = [](Type *ArrTy, Type *VecTy,
  1928. const DataLayout &DL) {
  1929. auto *VecVTy = cast<FixedVectorType>(VecTy);
  1930. return ArrTy->getArrayElementType() == VecVTy->getElementType() &&
  1931. ArrTy->getArrayNumElements() == VecVTy->getNumElements() &&
  1932. DL.getTypeAllocSize(ArrTy) == DL.getTypeAllocSize(VecTy);
  1933. };
  1934. if (GEP.getNumOperands() == 3 &&
  1935. ((GEPEltType->isArrayTy() && isa<FixedVectorType>(SrcEltType) &&
  1936. areMatchingArrayAndVecTypes(GEPEltType, SrcEltType, DL)) ||
  1937. (isa<FixedVectorType>(GEPEltType) && SrcEltType->isArrayTy() &&
  1938. areMatchingArrayAndVecTypes(SrcEltType, GEPEltType, DL)))) {
  1939. // Create a new GEP here, as using `setOperand()` followed by
  1940. // `setSourceElementType()` won't actually update the type of the
  1941. // existing GEP Value. Causing issues if this Value is accessed when
  1942. // constructing an AddrSpaceCastInst
  1943. SmallVector<Value *, 8> Indices(GEP.indices());
  1944. Value *NGEP =
  1945. Builder.CreateGEP(SrcEltType, SrcOp, Indices, "", GEP.isInBounds());
  1946. NGEP->takeName(&GEP);
  1947. // Preserve GEP address space to satisfy users
  1948. if (NGEP->getType()->getPointerAddressSpace() != GEP.getAddressSpace())
  1949. return new AddrSpaceCastInst(NGEP, GEP.getType());
  1950. return replaceInstUsesWith(GEP, NGEP);
  1951. }
  1952. // See if we can simplify:
  1953. // X = bitcast A* to B*
  1954. // Y = gep X, <...constant indices...>
  1955. // into a gep of the original struct. This is important for SROA and alias
  1956. // analysis of unions. If "A" is also a bitcast, wait for A/X to be merged.
  1957. unsigned OffsetBits = DL.getIndexTypeSizeInBits(GEP.getType());
  1958. APInt Offset(OffsetBits, 0);
  1959. // If the bitcast argument is an allocation, The bitcast is for convertion
  1960. // to actual type of allocation. Removing such bitcasts, results in having
  1961. // GEPs with i8* base and pure byte offsets. That means GEP is not aware of
  1962. // struct or array hierarchy.
  1963. // By avoiding such GEPs, phi translation and MemoryDependencyAnalysis have
  1964. // a better chance to succeed.
  1965. if (!isa<BitCastInst>(SrcOp) && GEP.accumulateConstantOffset(DL, Offset) &&
  1966. !isAllocationFn(SrcOp, &TLI)) {
  1967. // If this GEP instruction doesn't move the pointer, just replace the GEP
  1968. // with a bitcast of the real input to the dest type.
  1969. if (!Offset) {
  1970. // If the bitcast is of an allocation, and the allocation will be
  1971. // converted to match the type of the cast, don't touch this.
  1972. if (isa<AllocaInst>(SrcOp)) {
  1973. // See if the bitcast simplifies, if so, don't nuke this GEP yet.
  1974. if (Instruction *I = visitBitCast(*BCI)) {
  1975. if (I != BCI) {
  1976. I->takeName(BCI);
  1977. I->insertInto(BCI->getParent(), BCI->getIterator());
  1978. replaceInstUsesWith(*BCI, I);
  1979. }
  1980. return &GEP;
  1981. }
  1982. }
  1983. if (SrcType->getPointerAddressSpace() != GEP.getAddressSpace())
  1984. return new AddrSpaceCastInst(SrcOp, GEP.getType());
  1985. return new BitCastInst(SrcOp, GEP.getType());
  1986. }
  1987. // Otherwise, if the offset is non-zero, we need to find out if there is a
  1988. // field at Offset in 'A's type. If so, we can pull the cast through the
  1989. // GEP.
  1990. SmallVector<Value *, 8> NewIndices;
  1991. if (findElementAtOffset(SrcType, Offset.getSExtValue(), NewIndices, DL)) {
  1992. Value *NGEP = Builder.CreateGEP(SrcEltType, SrcOp, NewIndices, "",
  1993. GEP.isInBounds());
  1994. if (NGEP->getType() == GEP.getType())
  1995. return replaceInstUsesWith(GEP, NGEP);
  1996. NGEP->takeName(&GEP);
  1997. if (NGEP->getType()->getPointerAddressSpace() != GEP.getAddressSpace())
  1998. return new AddrSpaceCastInst(NGEP, GEP.getType());
  1999. return new BitCastInst(NGEP, GEP.getType());
  2000. }
  2001. }
  2002. return nullptr;
  2003. }
  2004. Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
  2005. Value *PtrOp = GEP.getOperand(0);
  2006. SmallVector<Value *, 8> Indices(GEP.indices());
  2007. Type *GEPType = GEP.getType();
  2008. Type *GEPEltType = GEP.getSourceElementType();
  2009. bool IsGEPSrcEleScalable = isa<ScalableVectorType>(GEPEltType);
  2010. if (Value *V = simplifyGEPInst(GEPEltType, PtrOp, Indices, GEP.isInBounds(),
  2011. SQ.getWithInstruction(&GEP)))
  2012. return replaceInstUsesWith(GEP, V);
  2013. // For vector geps, use the generic demanded vector support.
  2014. // Skip if GEP return type is scalable. The number of elements is unknown at
  2015. // compile-time.
  2016. if (auto *GEPFVTy = dyn_cast<FixedVectorType>(GEPType)) {
  2017. auto VWidth = GEPFVTy->getNumElements();
  2018. APInt UndefElts(VWidth, 0);
  2019. APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
  2020. if (Value *V = SimplifyDemandedVectorElts(&GEP, AllOnesEltMask,
  2021. UndefElts)) {
  2022. if (V != &GEP)
  2023. return replaceInstUsesWith(GEP, V);
  2024. return &GEP;
  2025. }
  2026. // TODO: 1) Scalarize splat operands, 2) scalarize entire instruction if
  2027. // possible (decide on canonical form for pointer broadcast), 3) exploit
  2028. // undef elements to decrease demanded bits
  2029. }
  2030. // Eliminate unneeded casts for indices, and replace indices which displace
  2031. // by multiples of a zero size type with zero.
  2032. bool MadeChange = false;
  2033. // Index width may not be the same width as pointer width.
  2034. // Data layout chooses the right type based on supported integer types.
  2035. Type *NewScalarIndexTy =
  2036. DL.getIndexType(GEP.getPointerOperandType()->getScalarType());
  2037. gep_type_iterator GTI = gep_type_begin(GEP);
  2038. for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E;
  2039. ++I, ++GTI) {
  2040. // Skip indices into struct types.
  2041. if (GTI.isStruct())
  2042. continue;
  2043. Type *IndexTy = (*I)->getType();
  2044. Type *NewIndexType =
  2045. IndexTy->isVectorTy()
  2046. ? VectorType::get(NewScalarIndexTy,
  2047. cast<VectorType>(IndexTy)->getElementCount())
  2048. : NewScalarIndexTy;
  2049. // If the element type has zero size then any index over it is equivalent
  2050. // to an index of zero, so replace it with zero if it is not zero already.
  2051. Type *EltTy = GTI.getIndexedType();
  2052. if (EltTy->isSized() && DL.getTypeAllocSize(EltTy).isZero())
  2053. if (!isa<Constant>(*I) || !match(I->get(), m_Zero())) {
  2054. *I = Constant::getNullValue(NewIndexType);
  2055. MadeChange = true;
  2056. }
  2057. if (IndexTy != NewIndexType) {
  2058. // If we are using a wider index than needed for this platform, shrink
  2059. // it to what we need. If narrower, sign-extend it to what we need.
  2060. // This explicit cast can make subsequent optimizations more obvious.
  2061. *I = Builder.CreateIntCast(*I, NewIndexType, true);
  2062. MadeChange = true;
  2063. }
  2064. }
  2065. if (MadeChange)
  2066. return &GEP;
  2067. // Check to see if the inputs to the PHI node are getelementptr instructions.
  2068. if (auto *PN = dyn_cast<PHINode>(PtrOp)) {
  2069. auto *Op1 = dyn_cast<GetElementPtrInst>(PN->getOperand(0));
  2070. if (!Op1)
  2071. return nullptr;
  2072. // Don't fold a GEP into itself through a PHI node. This can only happen
  2073. // through the back-edge of a loop. Folding a GEP into itself means that
  2074. // the value of the previous iteration needs to be stored in the meantime,
  2075. // thus requiring an additional register variable to be live, but not
  2076. // actually achieving anything (the GEP still needs to be executed once per
  2077. // loop iteration).
  2078. if (Op1 == &GEP)
  2079. return nullptr;
  2080. int DI = -1;
  2081. for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; ++I) {
  2082. auto *Op2 = dyn_cast<GetElementPtrInst>(*I);
  2083. if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands() ||
  2084. Op1->getSourceElementType() != Op2->getSourceElementType())
  2085. return nullptr;
  2086. // As for Op1 above, don't try to fold a GEP into itself.
  2087. if (Op2 == &GEP)
  2088. return nullptr;
  2089. // Keep track of the type as we walk the GEP.
  2090. Type *CurTy = nullptr;
  2091. for (unsigned J = 0, F = Op1->getNumOperands(); J != F; ++J) {
  2092. if (Op1->getOperand(J)->getType() != Op2->getOperand(J)->getType())
  2093. return nullptr;
  2094. if (Op1->getOperand(J) != Op2->getOperand(J)) {
  2095. if (DI == -1) {
  2096. // We have not seen any differences yet in the GEPs feeding the
  2097. // PHI yet, so we record this one if it is allowed to be a
  2098. // variable.
  2099. // The first two arguments can vary for any GEP, the rest have to be
  2100. // static for struct slots
  2101. if (J > 1) {
  2102. assert(CurTy && "No current type?");
  2103. if (CurTy->isStructTy())
  2104. return nullptr;
  2105. }
  2106. DI = J;
  2107. } else {
  2108. // The GEP is different by more than one input. While this could be
  2109. // extended to support GEPs that vary by more than one variable it
  2110. // doesn't make sense since it greatly increases the complexity and
  2111. // would result in an R+R+R addressing mode which no backend
  2112. // directly supports and would need to be broken into several
  2113. // simpler instructions anyway.
  2114. return nullptr;
  2115. }
  2116. }
  2117. // Sink down a layer of the type for the next iteration.
  2118. if (J > 0) {
  2119. if (J == 1) {
  2120. CurTy = Op1->getSourceElementType();
  2121. } else {
  2122. CurTy =
  2123. GetElementPtrInst::getTypeAtIndex(CurTy, Op1->getOperand(J));
  2124. }
  2125. }
  2126. }
  2127. }
  2128. // If not all GEPs are identical we'll have to create a new PHI node.
  2129. // Check that the old PHI node has only one use so that it will get
  2130. // removed.
  2131. if (DI != -1 && !PN->hasOneUse())
  2132. return nullptr;
  2133. auto *NewGEP = cast<GetElementPtrInst>(Op1->clone());
  2134. if (DI == -1) {
  2135. // All the GEPs feeding the PHI are identical. Clone one down into our
  2136. // BB so that it can be merged with the current GEP.
  2137. } else {
  2138. // All the GEPs feeding the PHI differ at a single offset. Clone a GEP
  2139. // into the current block so it can be merged, and create a new PHI to
  2140. // set that index.
  2141. PHINode *NewPN;
  2142. {
  2143. IRBuilderBase::InsertPointGuard Guard(Builder);
  2144. Builder.SetInsertPoint(PN);
  2145. NewPN = Builder.CreatePHI(Op1->getOperand(DI)->getType(),
  2146. PN->getNumOperands());
  2147. }
  2148. for (auto &I : PN->operands())
  2149. NewPN->addIncoming(cast<GEPOperator>(I)->getOperand(DI),
  2150. PN->getIncomingBlock(I));
  2151. NewGEP->setOperand(DI, NewPN);
  2152. }
  2153. NewGEP->insertInto(GEP.getParent(), GEP.getParent()->getFirstInsertionPt());
  2154. return replaceOperand(GEP, 0, NewGEP);
  2155. }
  2156. if (auto *Src = dyn_cast<GEPOperator>(PtrOp))
  2157. if (Instruction *I = visitGEPOfGEP(GEP, Src))
  2158. return I;
  2159. // Skip if GEP source element type is scalable. The type alloc size is unknown
  2160. // at compile-time.
  2161. if (GEP.getNumIndices() == 1 && !IsGEPSrcEleScalable) {
  2162. unsigned AS = GEP.getPointerAddressSpace();
  2163. if (GEP.getOperand(1)->getType()->getScalarSizeInBits() ==
  2164. DL.getIndexSizeInBits(AS)) {
  2165. uint64_t TyAllocSize = DL.getTypeAllocSize(GEPEltType).getFixedValue();
  2166. bool Matched = false;
  2167. uint64_t C;
  2168. Value *V = nullptr;
  2169. if (TyAllocSize == 1) {
  2170. V = GEP.getOperand(1);
  2171. Matched = true;
  2172. } else if (match(GEP.getOperand(1),
  2173. m_AShr(m_Value(V), m_ConstantInt(C)))) {
  2174. if (TyAllocSize == 1ULL << C)
  2175. Matched = true;
  2176. } else if (match(GEP.getOperand(1),
  2177. m_SDiv(m_Value(V), m_ConstantInt(C)))) {
  2178. if (TyAllocSize == C)
  2179. Matched = true;
  2180. }
  2181. // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X)) to (bitcast Y), but
  2182. // only if both point to the same underlying object (otherwise provenance
  2183. // is not necessarily retained).
  2184. Value *Y;
  2185. Value *X = GEP.getOperand(0);
  2186. if (Matched &&
  2187. match(V, m_Sub(m_PtrToInt(m_Value(Y)), m_PtrToInt(m_Specific(X)))) &&
  2188. getUnderlyingObject(X) == getUnderlyingObject(Y))
  2189. return CastInst::CreatePointerBitCastOrAddrSpaceCast(Y, GEPType);
  2190. }
  2191. }
  2192. // We do not handle pointer-vector geps here.
  2193. if (GEPType->isVectorTy())
  2194. return nullptr;
  2195. // Handle gep(bitcast x) and gep(gep x, 0, 0, 0).
  2196. Value *StrippedPtr = PtrOp->stripPointerCasts();
  2197. PointerType *StrippedPtrTy = cast<PointerType>(StrippedPtr->getType());
  2198. // TODO: The basic approach of these folds is not compatible with opaque
  2199. // pointers, because we can't use bitcasts as a hint for a desirable GEP
  2200. // type. Instead, we should perform canonicalization directly on the GEP
  2201. // type. For now, skip these.
  2202. if (StrippedPtr != PtrOp && !StrippedPtrTy->isOpaque()) {
  2203. bool HasZeroPointerIndex = false;
  2204. Type *StrippedPtrEltTy = StrippedPtrTy->getNonOpaquePointerElementType();
  2205. if (auto *C = dyn_cast<ConstantInt>(GEP.getOperand(1)))
  2206. HasZeroPointerIndex = C->isZero();
  2207. // Transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ...
  2208. // into : GEP [10 x i8]* X, i32 0, ...
  2209. //
  2210. // Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ...
  2211. // into : GEP i8* X, ...
  2212. //
  2213. // This occurs when the program declares an array extern like "int X[];"
  2214. if (HasZeroPointerIndex) {
  2215. if (auto *CATy = dyn_cast<ArrayType>(GEPEltType)) {
  2216. // GEP (bitcast i8* X to [0 x i8]*), i32 0, ... ?
  2217. if (CATy->getElementType() == StrippedPtrEltTy) {
  2218. // -> GEP i8* X, ...
  2219. SmallVector<Value *, 8> Idx(drop_begin(GEP.indices()));
  2220. GetElementPtrInst *Res = GetElementPtrInst::Create(
  2221. StrippedPtrEltTy, StrippedPtr, Idx, GEP.getName());
  2222. Res->setIsInBounds(GEP.isInBounds());
  2223. if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace())
  2224. return Res;
  2225. // Insert Res, and create an addrspacecast.
  2226. // e.g.,
  2227. // GEP (addrspacecast i8 addrspace(1)* X to [0 x i8]*), i32 0, ...
  2228. // ->
  2229. // %0 = GEP i8 addrspace(1)* X, ...
  2230. // addrspacecast i8 addrspace(1)* %0 to i8*
  2231. return new AddrSpaceCastInst(Builder.Insert(Res), GEPType);
  2232. }
  2233. if (auto *XATy = dyn_cast<ArrayType>(StrippedPtrEltTy)) {
  2234. // GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ?
  2235. if (CATy->getElementType() == XATy->getElementType()) {
  2236. // -> GEP [10 x i8]* X, i32 0, ...
  2237. // At this point, we know that the cast source type is a pointer
  2238. // to an array of the same type as the destination pointer
  2239. // array. Because the array type is never stepped over (there
  2240. // is a leading zero) we can fold the cast into this GEP.
  2241. if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace()) {
  2242. GEP.setSourceElementType(XATy);
  2243. return replaceOperand(GEP, 0, StrippedPtr);
  2244. }
  2245. // Cannot replace the base pointer directly because StrippedPtr's
  2246. // address space is different. Instead, create a new GEP followed by
  2247. // an addrspacecast.
  2248. // e.g.,
  2249. // GEP (addrspacecast [10 x i8] addrspace(1)* X to [0 x i8]*),
  2250. // i32 0, ...
  2251. // ->
  2252. // %0 = GEP [10 x i8] addrspace(1)* X, ...
  2253. // addrspacecast i8 addrspace(1)* %0 to i8*
  2254. SmallVector<Value *, 8> Idx(GEP.indices());
  2255. Value *NewGEP =
  2256. Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Idx,
  2257. GEP.getName(), GEP.isInBounds());
  2258. return new AddrSpaceCastInst(NewGEP, GEPType);
  2259. }
  2260. }
  2261. }
  2262. } else if (GEP.getNumOperands() == 2 && !IsGEPSrcEleScalable) {
  2263. // Skip if GEP source element type is scalable. The type alloc size is
  2264. // unknown at compile-time.
  2265. // Transform things like: %t = getelementptr i32*
  2266. // bitcast ([2 x i32]* %str to i32*), i32 %V into: %t1 = getelementptr [2
  2267. // x i32]* %str, i32 0, i32 %V; bitcast
  2268. if (StrippedPtrEltTy->isArrayTy() &&
  2269. DL.getTypeAllocSize(StrippedPtrEltTy->getArrayElementType()) ==
  2270. DL.getTypeAllocSize(GEPEltType)) {
  2271. Type *IdxType = DL.getIndexType(GEPType);
  2272. Value *Idx[2] = {Constant::getNullValue(IdxType), GEP.getOperand(1)};
  2273. Value *NewGEP = Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Idx,
  2274. GEP.getName(), GEP.isInBounds());
  2275. // V and GEP are both pointer types --> BitCast
  2276. return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP, GEPType);
  2277. }
  2278. // Transform things like:
  2279. // %V = mul i64 %N, 4
  2280. // %t = getelementptr i8* bitcast (i32* %arr to i8*), i32 %V
  2281. // into: %t1 = getelementptr i32* %arr, i32 %N; bitcast
  2282. if (GEPEltType->isSized() && StrippedPtrEltTy->isSized()) {
  2283. // Check that changing the type amounts to dividing the index by a scale
  2284. // factor.
  2285. uint64_t ResSize = DL.getTypeAllocSize(GEPEltType).getFixedValue();
  2286. uint64_t SrcSize =
  2287. DL.getTypeAllocSize(StrippedPtrEltTy).getFixedValue();
  2288. if (ResSize && SrcSize % ResSize == 0) {
  2289. Value *Idx = GEP.getOperand(1);
  2290. unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
  2291. uint64_t Scale = SrcSize / ResSize;
  2292. // Earlier transforms ensure that the index has the right type
  2293. // according to Data Layout, which considerably simplifies the
  2294. // logic by eliminating implicit casts.
  2295. assert(Idx->getType() == DL.getIndexType(GEPType) &&
  2296. "Index type does not match the Data Layout preferences");
  2297. bool NSW;
  2298. if (Value *NewIdx = Descale(Idx, APInt(BitWidth, Scale), NSW)) {
  2299. // Successfully decomposed Idx as NewIdx * Scale, form a new GEP.
  2300. // If the multiplication NewIdx * Scale may overflow then the new
  2301. // GEP may not be "inbounds".
  2302. Value *NewGEP =
  2303. Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, NewIdx,
  2304. GEP.getName(), GEP.isInBounds() && NSW);
  2305. // The NewGEP must be pointer typed, so must the old one -> BitCast
  2306. return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP,
  2307. GEPType);
  2308. }
  2309. }
  2310. }
  2311. // Similarly, transform things like:
  2312. // getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp
  2313. // (where tmp = 8*tmp2) into:
  2314. // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast
  2315. if (GEPEltType->isSized() && StrippedPtrEltTy->isSized() &&
  2316. StrippedPtrEltTy->isArrayTy()) {
  2317. // Check that changing to the array element type amounts to dividing the
  2318. // index by a scale factor.
  2319. uint64_t ResSize = DL.getTypeAllocSize(GEPEltType).getFixedValue();
  2320. uint64_t ArrayEltSize =
  2321. DL.getTypeAllocSize(StrippedPtrEltTy->getArrayElementType())
  2322. .getFixedValue();
  2323. if (ResSize && ArrayEltSize % ResSize == 0) {
  2324. Value *Idx = GEP.getOperand(1);
  2325. unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
  2326. uint64_t Scale = ArrayEltSize / ResSize;
  2327. // Earlier transforms ensure that the index has the right type
  2328. // according to the Data Layout, which considerably simplifies
  2329. // the logic by eliminating implicit casts.
  2330. assert(Idx->getType() == DL.getIndexType(GEPType) &&
  2331. "Index type does not match the Data Layout preferences");
  2332. bool NSW;
  2333. if (Value *NewIdx = Descale(Idx, APInt(BitWidth, Scale), NSW)) {
  2334. // Successfully decomposed Idx as NewIdx * Scale, form a new GEP.
  2335. // If the multiplication NewIdx * Scale may overflow then the new
  2336. // GEP may not be "inbounds".
  2337. Type *IndTy = DL.getIndexType(GEPType);
  2338. Value *Off[2] = {Constant::getNullValue(IndTy), NewIdx};
  2339. Value *NewGEP =
  2340. Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Off,
  2341. GEP.getName(), GEP.isInBounds() && NSW);
  2342. // The NewGEP must be pointer typed, so must the old one -> BitCast
  2343. return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP,
  2344. GEPType);
  2345. }
  2346. }
  2347. }
  2348. }
  2349. }
  2350. // addrspacecast between types is canonicalized as a bitcast, then an
  2351. // addrspacecast. To take advantage of the below bitcast + struct GEP, look
  2352. // through the addrspacecast.
  2353. Value *ASCStrippedPtrOp = PtrOp;
  2354. if (auto *ASC = dyn_cast<AddrSpaceCastInst>(PtrOp)) {
  2355. // X = bitcast A addrspace(1)* to B addrspace(1)*
  2356. // Y = addrspacecast A addrspace(1)* to B addrspace(2)*
  2357. // Z = gep Y, <...constant indices...>
  2358. // Into an addrspacecasted GEP of the struct.
  2359. if (auto *BC = dyn_cast<BitCastInst>(ASC->getOperand(0)))
  2360. ASCStrippedPtrOp = BC;
  2361. }
  2362. if (auto *BCI = dyn_cast<BitCastInst>(ASCStrippedPtrOp))
  2363. if (Instruction *I = visitGEPOfBitcast(BCI, GEP))
  2364. return I;
  2365. if (!GEP.isInBounds()) {
  2366. unsigned IdxWidth =
  2367. DL.getIndexSizeInBits(PtrOp->getType()->getPointerAddressSpace());
  2368. APInt BasePtrOffset(IdxWidth, 0);
  2369. Value *UnderlyingPtrOp =
  2370. PtrOp->stripAndAccumulateInBoundsConstantOffsets(DL,
  2371. BasePtrOffset);
  2372. if (auto *AI = dyn_cast<AllocaInst>(UnderlyingPtrOp)) {
  2373. if (GEP.accumulateConstantOffset(DL, BasePtrOffset) &&
  2374. BasePtrOffset.isNonNegative()) {
  2375. APInt AllocSize(
  2376. IdxWidth,
  2377. DL.getTypeAllocSize(AI->getAllocatedType()).getKnownMinValue());
  2378. if (BasePtrOffset.ule(AllocSize)) {
  2379. return GetElementPtrInst::CreateInBounds(
  2380. GEP.getSourceElementType(), PtrOp, Indices, GEP.getName());
  2381. }
  2382. }
  2383. }
  2384. }
  2385. if (Instruction *R = foldSelectGEP(GEP, Builder))
  2386. return R;
  2387. return nullptr;
  2388. }
  2389. static bool isNeverEqualToUnescapedAlloc(Value *V, const TargetLibraryInfo &TLI,
  2390. Instruction *AI) {
  2391. if (isa<ConstantPointerNull>(V))
  2392. return true;
  2393. if (auto *LI = dyn_cast<LoadInst>(V))
  2394. return isa<GlobalVariable>(LI->getPointerOperand());
  2395. // Two distinct allocations will never be equal.
  2396. return isAllocLikeFn(V, &TLI) && V != AI;
  2397. }
  2398. /// Given a call CB which uses an address UsedV, return true if we can prove the
  2399. /// call's only possible effect is storing to V.
  2400. static bool isRemovableWrite(CallBase &CB, Value *UsedV,
  2401. const TargetLibraryInfo &TLI) {
  2402. if (!CB.use_empty())
  2403. // TODO: add recursion if returned attribute is present
  2404. return false;
  2405. if (CB.isTerminator())
  2406. // TODO: remove implementation restriction
  2407. return false;
  2408. if (!CB.willReturn() || !CB.doesNotThrow())
  2409. return false;
  2410. // If the only possible side effect of the call is writing to the alloca,
  2411. // and the result isn't used, we can safely remove any reads implied by the
  2412. // call including those which might read the alloca itself.
  2413. std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(&CB, TLI);
  2414. return Dest && Dest->Ptr == UsedV;
  2415. }
  2416. static bool isAllocSiteRemovable(Instruction *AI,
  2417. SmallVectorImpl<WeakTrackingVH> &Users,
  2418. const TargetLibraryInfo &TLI) {
  2419. SmallVector<Instruction*, 4> Worklist;
  2420. const std::optional<StringRef> Family = getAllocationFamily(AI, &TLI);
  2421. Worklist.push_back(AI);
  2422. do {
  2423. Instruction *PI = Worklist.pop_back_val();
  2424. for (User *U : PI->users()) {
  2425. Instruction *I = cast<Instruction>(U);
  2426. switch (I->getOpcode()) {
  2427. default:
  2428. // Give up the moment we see something we can't handle.
  2429. return false;
  2430. case Instruction::AddrSpaceCast:
  2431. case Instruction::BitCast:
  2432. case Instruction::GetElementPtr:
  2433. Users.emplace_back(I);
  2434. Worklist.push_back(I);
  2435. continue;
  2436. case Instruction::ICmp: {
  2437. ICmpInst *ICI = cast<ICmpInst>(I);
  2438. // We can fold eq/ne comparisons with null to false/true, respectively.
  2439. // We also fold comparisons in some conditions provided the alloc has
  2440. // not escaped (see isNeverEqualToUnescapedAlloc).
  2441. if (!ICI->isEquality())
  2442. return false;
  2443. unsigned OtherIndex = (ICI->getOperand(0) == PI) ? 1 : 0;
  2444. if (!isNeverEqualToUnescapedAlloc(ICI->getOperand(OtherIndex), TLI, AI))
  2445. return false;
  2446. Users.emplace_back(I);
  2447. continue;
  2448. }
  2449. case Instruction::Call:
  2450. // Ignore no-op and store intrinsics.
  2451. if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
  2452. switch (II->getIntrinsicID()) {
  2453. default:
  2454. return false;
  2455. case Intrinsic::memmove:
  2456. case Intrinsic::memcpy:
  2457. case Intrinsic::memset: {
  2458. MemIntrinsic *MI = cast<MemIntrinsic>(II);
  2459. if (MI->isVolatile() || MI->getRawDest() != PI)
  2460. return false;
  2461. [[fallthrough]];
  2462. }
  2463. case Intrinsic::assume:
  2464. case Intrinsic::invariant_start:
  2465. case Intrinsic::invariant_end:
  2466. case Intrinsic::lifetime_start:
  2467. case Intrinsic::lifetime_end:
  2468. case Intrinsic::objectsize:
  2469. Users.emplace_back(I);
  2470. continue;
  2471. case Intrinsic::launder_invariant_group:
  2472. case Intrinsic::strip_invariant_group:
  2473. Users.emplace_back(I);
  2474. Worklist.push_back(I);
  2475. continue;
  2476. }
  2477. }
  2478. if (isRemovableWrite(*cast<CallBase>(I), PI, TLI)) {
  2479. Users.emplace_back(I);
  2480. continue;
  2481. }
  2482. if (getFreedOperand(cast<CallBase>(I), &TLI) == PI &&
  2483. getAllocationFamily(I, &TLI) == Family) {
  2484. assert(Family);
  2485. Users.emplace_back(I);
  2486. continue;
  2487. }
  2488. if (getReallocatedOperand(cast<CallBase>(I)) == PI &&
  2489. getAllocationFamily(I, &TLI) == Family) {
  2490. assert(Family);
  2491. Users.emplace_back(I);
  2492. Worklist.push_back(I);
  2493. continue;
  2494. }
  2495. return false;
  2496. case Instruction::Store: {
  2497. StoreInst *SI = cast<StoreInst>(I);
  2498. if (SI->isVolatile() || SI->getPointerOperand() != PI)
  2499. return false;
  2500. Users.emplace_back(I);
  2501. continue;
  2502. }
  2503. }
  2504. llvm_unreachable("missing a return?");
  2505. }
  2506. } while (!Worklist.empty());
  2507. return true;
  2508. }
  2509. Instruction *InstCombinerImpl::visitAllocSite(Instruction &MI) {
  2510. assert(isa<AllocaInst>(MI) || isRemovableAlloc(&cast<CallBase>(MI), &TLI));
  2511. // If we have a malloc call which is only used in any amount of comparisons to
  2512. // null and free calls, delete the calls and replace the comparisons with true
  2513. // or false as appropriate.
  2514. // This is based on the principle that we can substitute our own allocation
  2515. // function (which will never return null) rather than knowledge of the
  2516. // specific function being called. In some sense this can change the permitted
  2517. // outputs of a program (when we convert a malloc to an alloca, the fact that
  2518. // the allocation is now on the stack is potentially visible, for example),
  2519. // but we believe in a permissible manner.
  2520. SmallVector<WeakTrackingVH, 64> Users;
  2521. // If we are removing an alloca with a dbg.declare, insert dbg.value calls
  2522. // before each store.
  2523. SmallVector<DbgVariableIntrinsic *, 8> DVIs;
  2524. std::unique_ptr<DIBuilder> DIB;
  2525. if (isa<AllocaInst>(MI)) {
  2526. findDbgUsers(DVIs, &MI);
  2527. DIB.reset(new DIBuilder(*MI.getModule(), /*AllowUnresolved=*/false));
  2528. }
  2529. if (isAllocSiteRemovable(&MI, Users, TLI)) {
  2530. for (unsigned i = 0, e = Users.size(); i != e; ++i) {
  2531. // Lowering all @llvm.objectsize calls first because they may
  2532. // use a bitcast/GEP of the alloca we are removing.
  2533. if (!Users[i])
  2534. continue;
  2535. Instruction *I = cast<Instruction>(&*Users[i]);
  2536. if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
  2537. if (II->getIntrinsicID() == Intrinsic::objectsize) {
  2538. Value *Result =
  2539. lowerObjectSizeCall(II, DL, &TLI, AA, /*MustSucceed=*/true);
  2540. replaceInstUsesWith(*I, Result);
  2541. eraseInstFromFunction(*I);
  2542. Users[i] = nullptr; // Skip examining in the next loop.
  2543. }
  2544. }
  2545. }
  2546. for (unsigned i = 0, e = Users.size(); i != e; ++i) {
  2547. if (!Users[i])
  2548. continue;
  2549. Instruction *I = cast<Instruction>(&*Users[i]);
  2550. if (ICmpInst *C = dyn_cast<ICmpInst>(I)) {
  2551. replaceInstUsesWith(*C,
  2552. ConstantInt::get(Type::getInt1Ty(C->getContext()),
  2553. C->isFalseWhenEqual()));
  2554. } else if (auto *SI = dyn_cast<StoreInst>(I)) {
  2555. for (auto *DVI : DVIs)
  2556. if (DVI->isAddressOfVariable())
  2557. ConvertDebugDeclareToDebugValue(DVI, SI, *DIB);
  2558. } else {
  2559. // Casts, GEP, or anything else: we're about to delete this instruction,
  2560. // so it can not have any valid uses.
  2561. replaceInstUsesWith(*I, PoisonValue::get(I->getType()));
  2562. }
  2563. eraseInstFromFunction(*I);
  2564. }
  2565. if (InvokeInst *II = dyn_cast<InvokeInst>(&MI)) {
  2566. // Replace invoke with a NOP intrinsic to maintain the original CFG
  2567. Module *M = II->getModule();
  2568. Function *F = Intrinsic::getDeclaration(M, Intrinsic::donothing);
  2569. InvokeInst::Create(F, II->getNormalDest(), II->getUnwindDest(),
  2570. std::nullopt, "", II->getParent());
  2571. }
  2572. // Remove debug intrinsics which describe the value contained within the
  2573. // alloca. In addition to removing dbg.{declare,addr} which simply point to
  2574. // the alloca, remove dbg.value(<alloca>, ..., DW_OP_deref)'s as well, e.g.:
  2575. //
  2576. // ```
  2577. // define void @foo(i32 %0) {
  2578. // %a = alloca i32 ; Deleted.
  2579. // store i32 %0, i32* %a
  2580. // dbg.value(i32 %0, "arg0") ; Not deleted.
  2581. // dbg.value(i32* %a, "arg0", DW_OP_deref) ; Deleted.
  2582. // call void @trivially_inlinable_no_op(i32* %a)
  2583. // ret void
  2584. // }
  2585. // ```
  2586. //
  2587. // This may not be required if we stop describing the contents of allocas
  2588. // using dbg.value(<alloca>, ..., DW_OP_deref), but we currently do this in
  2589. // the LowerDbgDeclare utility.
  2590. //
  2591. // If there is a dead store to `%a` in @trivially_inlinable_no_op, the
  2592. // "arg0" dbg.value may be stale after the call. However, failing to remove
  2593. // the DW_OP_deref dbg.value causes large gaps in location coverage.
  2594. for (auto *DVI : DVIs)
  2595. if (DVI->isAddressOfVariable() || DVI->getExpression()->startsWithDeref())
  2596. DVI->eraseFromParent();
  2597. return eraseInstFromFunction(MI);
  2598. }
  2599. return nullptr;
  2600. }
  2601. /// Move the call to free before a NULL test.
  2602. ///
  2603. /// Check if this free is accessed after its argument has been test
  2604. /// against NULL (property 0).
  2605. /// If yes, it is legal to move this call in its predecessor block.
  2606. ///
  2607. /// The move is performed only if the block containing the call to free
  2608. /// will be removed, i.e.:
  2609. /// 1. it has only one predecessor P, and P has two successors
  2610. /// 2. it contains the call, noops, and an unconditional branch
  2611. /// 3. its successor is the same as its predecessor's successor
  2612. ///
  2613. /// The profitability is out-of concern here and this function should
  2614. /// be called only if the caller knows this transformation would be
  2615. /// profitable (e.g., for code size).
  2616. static Instruction *tryToMoveFreeBeforeNullTest(CallInst &FI,
  2617. const DataLayout &DL) {
  2618. Value *Op = FI.getArgOperand(0);
  2619. BasicBlock *FreeInstrBB = FI.getParent();
  2620. BasicBlock *PredBB = FreeInstrBB->getSinglePredecessor();
  2621. // Validate part of constraint #1: Only one predecessor
  2622. // FIXME: We can extend the number of predecessor, but in that case, we
  2623. // would duplicate the call to free in each predecessor and it may
  2624. // not be profitable even for code size.
  2625. if (!PredBB)
  2626. return nullptr;
  2627. // Validate constraint #2: Does this block contains only the call to
  2628. // free, noops, and an unconditional branch?
  2629. BasicBlock *SuccBB;
  2630. Instruction *FreeInstrBBTerminator = FreeInstrBB->getTerminator();
  2631. if (!match(FreeInstrBBTerminator, m_UnconditionalBr(SuccBB)))
  2632. return nullptr;
  2633. // If there are only 2 instructions in the block, at this point,
  2634. // this is the call to free and unconditional.
  2635. // If there are more than 2 instructions, check that they are noops
  2636. // i.e., they won't hurt the performance of the generated code.
  2637. if (FreeInstrBB->size() != 2) {
  2638. for (const Instruction &Inst : FreeInstrBB->instructionsWithoutDebug()) {
  2639. if (&Inst == &FI || &Inst == FreeInstrBBTerminator)
  2640. continue;
  2641. auto *Cast = dyn_cast<CastInst>(&Inst);
  2642. if (!Cast || !Cast->isNoopCast(DL))
  2643. return nullptr;
  2644. }
  2645. }
  2646. // Validate the rest of constraint #1 by matching on the pred branch.
  2647. Instruction *TI = PredBB->getTerminator();
  2648. BasicBlock *TrueBB, *FalseBB;
  2649. ICmpInst::Predicate Pred;
  2650. if (!match(TI, m_Br(m_ICmp(Pred,
  2651. m_CombineOr(m_Specific(Op),
  2652. m_Specific(Op->stripPointerCasts())),
  2653. m_Zero()),
  2654. TrueBB, FalseBB)))
  2655. return nullptr;
  2656. if (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE)
  2657. return nullptr;
  2658. // Validate constraint #3: Ensure the null case just falls through.
  2659. if (SuccBB != (Pred == ICmpInst::ICMP_EQ ? TrueBB : FalseBB))
  2660. return nullptr;
  2661. assert(FreeInstrBB == (Pred == ICmpInst::ICMP_EQ ? FalseBB : TrueBB) &&
  2662. "Broken CFG: missing edge from predecessor to successor");
  2663. // At this point, we know that everything in FreeInstrBB can be moved
  2664. // before TI.
  2665. for (Instruction &Instr : llvm::make_early_inc_range(*FreeInstrBB)) {
  2666. if (&Instr == FreeInstrBBTerminator)
  2667. break;
  2668. Instr.moveBefore(TI);
  2669. }
  2670. assert(FreeInstrBB->size() == 1 &&
  2671. "Only the branch instruction should remain");
  2672. // Now that we've moved the call to free before the NULL check, we have to
  2673. // remove any attributes on its parameter that imply it's non-null, because
  2674. // those attributes might have only been valid because of the NULL check, and
  2675. // we can get miscompiles if we keep them. This is conservative if non-null is
  2676. // also implied by something other than the NULL check, but it's guaranteed to
  2677. // be correct, and the conservativeness won't matter in practice, since the
  2678. // attributes are irrelevant for the call to free itself and the pointer
  2679. // shouldn't be used after the call.
  2680. AttributeList Attrs = FI.getAttributes();
  2681. Attrs = Attrs.removeParamAttribute(FI.getContext(), 0, Attribute::NonNull);
  2682. Attribute Dereferenceable = Attrs.getParamAttr(0, Attribute::Dereferenceable);
  2683. if (Dereferenceable.isValid()) {
  2684. uint64_t Bytes = Dereferenceable.getDereferenceableBytes();
  2685. Attrs = Attrs.removeParamAttribute(FI.getContext(), 0,
  2686. Attribute::Dereferenceable);
  2687. Attrs = Attrs.addDereferenceableOrNullParamAttr(FI.getContext(), 0, Bytes);
  2688. }
  2689. FI.setAttributes(Attrs);
  2690. return &FI;
  2691. }
  2692. Instruction *InstCombinerImpl::visitFree(CallInst &FI, Value *Op) {
  2693. // free undef -> unreachable.
  2694. if (isa<UndefValue>(Op)) {
  2695. // Leave a marker since we can't modify the CFG here.
  2696. CreateNonTerminatorUnreachable(&FI);
  2697. return eraseInstFromFunction(FI);
  2698. }
  2699. // If we have 'free null' delete the instruction. This can happen in stl code
  2700. // when lots of inlining happens.
  2701. if (isa<ConstantPointerNull>(Op))
  2702. return eraseInstFromFunction(FI);
  2703. // If we had free(realloc(...)) with no intervening uses, then eliminate the
  2704. // realloc() entirely.
  2705. CallInst *CI = dyn_cast<CallInst>(Op);
  2706. if (CI && CI->hasOneUse())
  2707. if (Value *ReallocatedOp = getReallocatedOperand(CI))
  2708. return eraseInstFromFunction(*replaceInstUsesWith(*CI, ReallocatedOp));
  2709. // If we optimize for code size, try to move the call to free before the null
  2710. // test so that simplify cfg can remove the empty block and dead code
  2711. // elimination the branch. I.e., helps to turn something like:
  2712. // if (foo) free(foo);
  2713. // into
  2714. // free(foo);
  2715. //
  2716. // Note that we can only do this for 'free' and not for any flavor of
  2717. // 'operator delete'; there is no 'operator delete' symbol for which we are
  2718. // permitted to invent a call, even if we're passing in a null pointer.
  2719. if (MinimizeSize) {
  2720. LibFunc Func;
  2721. if (TLI.getLibFunc(FI, Func) && TLI.has(Func) && Func == LibFunc_free)
  2722. if (Instruction *I = tryToMoveFreeBeforeNullTest(FI, DL))
  2723. return I;
  2724. }
  2725. return nullptr;
  2726. }
  2727. static bool isMustTailCall(Value *V) {
  2728. if (auto *CI = dyn_cast<CallInst>(V))
  2729. return CI->isMustTailCall();
  2730. return false;
  2731. }
  2732. Instruction *InstCombinerImpl::visitReturnInst(ReturnInst &RI) {
  2733. if (RI.getNumOperands() == 0) // ret void
  2734. return nullptr;
  2735. Value *ResultOp = RI.getOperand(0);
  2736. Type *VTy = ResultOp->getType();
  2737. if (!VTy->isIntegerTy() || isa<Constant>(ResultOp))
  2738. return nullptr;
  2739. // Don't replace result of musttail calls.
  2740. if (isMustTailCall(ResultOp))
  2741. return nullptr;
  2742. // There might be assume intrinsics dominating this return that completely
  2743. // determine the value. If so, constant fold it.
  2744. KnownBits Known = computeKnownBits(ResultOp, 0, &RI);
  2745. if (Known.isConstant())
  2746. return replaceOperand(RI, 0,
  2747. Constant::getIntegerValue(VTy, Known.getConstant()));
  2748. return nullptr;
  2749. }
  2750. // WARNING: keep in sync with SimplifyCFGOpt::simplifyUnreachable()!
  2751. Instruction *InstCombinerImpl::visitUnreachableInst(UnreachableInst &I) {
  2752. // Try to remove the previous instruction if it must lead to unreachable.
  2753. // This includes instructions like stores and "llvm.assume" that may not get
  2754. // removed by simple dead code elimination.
  2755. while (Instruction *Prev = I.getPrevNonDebugInstruction()) {
  2756. // While we theoretically can erase EH, that would result in a block that
  2757. // used to start with an EH no longer starting with EH, which is invalid.
  2758. // To make it valid, we'd need to fixup predecessors to no longer refer to
  2759. // this block, but that changes CFG, which is not allowed in InstCombine.
  2760. if (Prev->isEHPad())
  2761. return nullptr; // Can not drop any more instructions. We're done here.
  2762. if (!isGuaranteedToTransferExecutionToSuccessor(Prev))
  2763. return nullptr; // Can not drop any more instructions. We're done here.
  2764. // Otherwise, this instruction can be freely erased,
  2765. // even if it is not side-effect free.
  2766. // A value may still have uses before we process it here (for example, in
  2767. // another unreachable block), so convert those to poison.
  2768. replaceInstUsesWith(*Prev, PoisonValue::get(Prev->getType()));
  2769. eraseInstFromFunction(*Prev);
  2770. }
  2771. assert(I.getParent()->sizeWithoutDebug() == 1 && "The block is now empty.");
  2772. // FIXME: recurse into unconditional predecessors?
  2773. return nullptr;
  2774. }
  2775. Instruction *InstCombinerImpl::visitUnconditionalBranchInst(BranchInst &BI) {
  2776. assert(BI.isUnconditional() && "Only for unconditional branches.");
  2777. // If this store is the second-to-last instruction in the basic block
  2778. // (excluding debug info and bitcasts of pointers) and if the block ends with
  2779. // an unconditional branch, try to move the store to the successor block.
  2780. auto GetLastSinkableStore = [](BasicBlock::iterator BBI) {
  2781. auto IsNoopInstrForStoreMerging = [](BasicBlock::iterator BBI) {
  2782. return BBI->isDebugOrPseudoInst() ||
  2783. (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy());
  2784. };
  2785. BasicBlock::iterator FirstInstr = BBI->getParent()->begin();
  2786. do {
  2787. if (BBI != FirstInstr)
  2788. --BBI;
  2789. } while (BBI != FirstInstr && IsNoopInstrForStoreMerging(BBI));
  2790. return dyn_cast<StoreInst>(BBI);
  2791. };
  2792. if (StoreInst *SI = GetLastSinkableStore(BasicBlock::iterator(BI)))
  2793. if (mergeStoreIntoSuccessor(*SI))
  2794. return &BI;
  2795. return nullptr;
  2796. }
  2797. Instruction *InstCombinerImpl::visitBranchInst(BranchInst &BI) {
  2798. if (BI.isUnconditional())
  2799. return visitUnconditionalBranchInst(BI);
  2800. // Change br (not X), label True, label False to: br X, label False, True
  2801. Value *Cond = BI.getCondition();
  2802. Value *X;
  2803. if (match(Cond, m_Not(m_Value(X))) && !isa<Constant>(X)) {
  2804. // Swap Destinations and condition...
  2805. BI.swapSuccessors();
  2806. return replaceOperand(BI, 0, X);
  2807. }
  2808. // Canonicalize logical-and-with-invert as logical-or-with-invert.
  2809. // This is done by inverting the condition and swapping successors:
  2810. // br (X && !Y), T, F --> br !(X && !Y), F, T --> br (!X || Y), F, T
  2811. Value *Y;
  2812. if (isa<SelectInst>(Cond) &&
  2813. match(Cond,
  2814. m_OneUse(m_LogicalAnd(m_Value(X), m_OneUse(m_Not(m_Value(Y))))))) {
  2815. Value *NotX = Builder.CreateNot(X, "not." + X->getName());
  2816. Value *Or = Builder.CreateLogicalOr(NotX, Y);
  2817. BI.swapSuccessors();
  2818. return replaceOperand(BI, 0, Or);
  2819. }
  2820. // If the condition is irrelevant, remove the use so that other
  2821. // transforms on the condition become more effective.
  2822. if (!isa<ConstantInt>(Cond) && BI.getSuccessor(0) == BI.getSuccessor(1))
  2823. return replaceOperand(BI, 0, ConstantInt::getFalse(Cond->getType()));
  2824. // Canonicalize, for example, fcmp_one -> fcmp_oeq.
  2825. CmpInst::Predicate Pred;
  2826. if (match(Cond, m_OneUse(m_FCmp(Pred, m_Value(), m_Value()))) &&
  2827. !isCanonicalPredicate(Pred)) {
  2828. // Swap destinations and condition.
  2829. auto *Cmp = cast<CmpInst>(Cond);
  2830. Cmp->setPredicate(CmpInst::getInversePredicate(Pred));
  2831. BI.swapSuccessors();
  2832. Worklist.push(Cmp);
  2833. return &BI;
  2834. }
  2835. return nullptr;
  2836. }
  2837. Instruction *InstCombinerImpl::visitSwitchInst(SwitchInst &SI) {
  2838. Value *Cond = SI.getCondition();
  2839. Value *Op0;
  2840. ConstantInt *AddRHS;
  2841. if (match(Cond, m_Add(m_Value(Op0), m_ConstantInt(AddRHS)))) {
  2842. // Change 'switch (X+4) case 1:' into 'switch (X) case -3'.
  2843. for (auto Case : SI.cases()) {
  2844. Constant *NewCase = ConstantExpr::getSub(Case.getCaseValue(), AddRHS);
  2845. assert(isa<ConstantInt>(NewCase) &&
  2846. "Result of expression should be constant");
  2847. Case.setValue(cast<ConstantInt>(NewCase));
  2848. }
  2849. return replaceOperand(SI, 0, Op0);
  2850. }
  2851. KnownBits Known = computeKnownBits(Cond, 0, &SI);
  2852. unsigned LeadingKnownZeros = Known.countMinLeadingZeros();
  2853. unsigned LeadingKnownOnes = Known.countMinLeadingOnes();
  2854. // Compute the number of leading bits we can ignore.
  2855. // TODO: A better way to determine this would use ComputeNumSignBits().
  2856. for (const auto &C : SI.cases()) {
  2857. LeadingKnownZeros = std::min(
  2858. LeadingKnownZeros, C.getCaseValue()->getValue().countLeadingZeros());
  2859. LeadingKnownOnes = std::min(
  2860. LeadingKnownOnes, C.getCaseValue()->getValue().countLeadingOnes());
  2861. }
  2862. unsigned NewWidth = Known.getBitWidth() - std::max(LeadingKnownZeros, LeadingKnownOnes);
  2863. // Shrink the condition operand if the new type is smaller than the old type.
  2864. // But do not shrink to a non-standard type, because backend can't generate
  2865. // good code for that yet.
  2866. // TODO: We can make it aggressive again after fixing PR39569.
  2867. if (NewWidth > 0 && NewWidth < Known.getBitWidth() &&
  2868. shouldChangeType(Known.getBitWidth(), NewWidth)) {
  2869. IntegerType *Ty = IntegerType::get(SI.getContext(), NewWidth);
  2870. Builder.SetInsertPoint(&SI);
  2871. Value *NewCond = Builder.CreateTrunc(Cond, Ty, "trunc");
  2872. for (auto Case : SI.cases()) {
  2873. APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(NewWidth);
  2874. Case.setValue(ConstantInt::get(SI.getContext(), TruncatedCase));
  2875. }
  2876. return replaceOperand(SI, 0, NewCond);
  2877. }
  2878. return nullptr;
  2879. }
  2880. Instruction *
  2881. InstCombinerImpl::foldExtractOfOverflowIntrinsic(ExtractValueInst &EV) {
  2882. auto *WO = dyn_cast<WithOverflowInst>(EV.getAggregateOperand());
  2883. if (!WO)
  2884. return nullptr;
  2885. Intrinsic::ID OvID = WO->getIntrinsicID();
  2886. const APInt *C = nullptr;
  2887. if (match(WO->getRHS(), m_APIntAllowUndef(C))) {
  2888. if (*EV.idx_begin() == 0 && (OvID == Intrinsic::smul_with_overflow ||
  2889. OvID == Intrinsic::umul_with_overflow)) {
  2890. // extractvalue (any_mul_with_overflow X, -1), 0 --> -X
  2891. if (C->isAllOnes())
  2892. return BinaryOperator::CreateNeg(WO->getLHS());
  2893. // extractvalue (any_mul_with_overflow X, 2^n), 0 --> X << n
  2894. if (C->isPowerOf2()) {
  2895. return BinaryOperator::CreateShl(
  2896. WO->getLHS(),
  2897. ConstantInt::get(WO->getLHS()->getType(), C->logBase2()));
  2898. }
  2899. }
  2900. }
  2901. // We're extracting from an overflow intrinsic. See if we're the only user.
  2902. // That allows us to simplify multiple result intrinsics to simpler things
  2903. // that just get one value.
  2904. if (!WO->hasOneUse())
  2905. return nullptr;
  2906. // Check if we're grabbing only the result of a 'with overflow' intrinsic
  2907. // and replace it with a traditional binary instruction.
  2908. if (*EV.idx_begin() == 0) {
  2909. Instruction::BinaryOps BinOp = WO->getBinaryOp();
  2910. Value *LHS = WO->getLHS(), *RHS = WO->getRHS();
  2911. // Replace the old instruction's uses with poison.
  2912. replaceInstUsesWith(*WO, PoisonValue::get(WO->getType()));
  2913. eraseInstFromFunction(*WO);
  2914. return BinaryOperator::Create(BinOp, LHS, RHS);
  2915. }
  2916. assert(*EV.idx_begin() == 1 && "Unexpected extract index for overflow inst");
  2917. // (usub LHS, RHS) overflows when LHS is unsigned-less-than RHS.
  2918. if (OvID == Intrinsic::usub_with_overflow)
  2919. return new ICmpInst(ICmpInst::ICMP_ULT, WO->getLHS(), WO->getRHS());
  2920. // smul with i1 types overflows when both sides are set: -1 * -1 == +1, but
  2921. // +1 is not possible because we assume signed values.
  2922. if (OvID == Intrinsic::smul_with_overflow &&
  2923. WO->getLHS()->getType()->isIntOrIntVectorTy(1))
  2924. return BinaryOperator::CreateAnd(WO->getLHS(), WO->getRHS());
  2925. // If only the overflow result is used, and the right hand side is a
  2926. // constant (or constant splat), we can remove the intrinsic by directly
  2927. // checking for overflow.
  2928. if (C) {
  2929. // Compute the no-wrap range for LHS given RHS=C, then construct an
  2930. // equivalent icmp, potentially using an offset.
  2931. ConstantRange NWR = ConstantRange::makeExactNoWrapRegion(
  2932. WO->getBinaryOp(), *C, WO->getNoWrapKind());
  2933. CmpInst::Predicate Pred;
  2934. APInt NewRHSC, Offset;
  2935. NWR.getEquivalentICmp(Pred, NewRHSC, Offset);
  2936. auto *OpTy = WO->getRHS()->getType();
  2937. auto *NewLHS = WO->getLHS();
  2938. if (Offset != 0)
  2939. NewLHS = Builder.CreateAdd(NewLHS, ConstantInt::get(OpTy, Offset));
  2940. return new ICmpInst(ICmpInst::getInversePredicate(Pred), NewLHS,
  2941. ConstantInt::get(OpTy, NewRHSC));
  2942. }
  2943. return nullptr;
  2944. }
  2945. Instruction *InstCombinerImpl::visitExtractValueInst(ExtractValueInst &EV) {
  2946. Value *Agg = EV.getAggregateOperand();
  2947. if (!EV.hasIndices())
  2948. return replaceInstUsesWith(EV, Agg);
  2949. if (Value *V = simplifyExtractValueInst(Agg, EV.getIndices(),
  2950. SQ.getWithInstruction(&EV)))
  2951. return replaceInstUsesWith(EV, V);
  2952. if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) {
  2953. // We're extracting from an insertvalue instruction, compare the indices
  2954. const unsigned *exti, *exte, *insi, *inse;
  2955. for (exti = EV.idx_begin(), insi = IV->idx_begin(),
  2956. exte = EV.idx_end(), inse = IV->idx_end();
  2957. exti != exte && insi != inse;
  2958. ++exti, ++insi) {
  2959. if (*insi != *exti)
  2960. // The insert and extract both reference distinctly different elements.
  2961. // This means the extract is not influenced by the insert, and we can
  2962. // replace the aggregate operand of the extract with the aggregate
  2963. // operand of the insert. i.e., replace
  2964. // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
  2965. // %E = extractvalue { i32, { i32 } } %I, 0
  2966. // with
  2967. // %E = extractvalue { i32, { i32 } } %A, 0
  2968. return ExtractValueInst::Create(IV->getAggregateOperand(),
  2969. EV.getIndices());
  2970. }
  2971. if (exti == exte && insi == inse)
  2972. // Both iterators are at the end: Index lists are identical. Replace
  2973. // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
  2974. // %C = extractvalue { i32, { i32 } } %B, 1, 0
  2975. // with "i32 42"
  2976. return replaceInstUsesWith(EV, IV->getInsertedValueOperand());
  2977. if (exti == exte) {
  2978. // The extract list is a prefix of the insert list. i.e. replace
  2979. // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
  2980. // %E = extractvalue { i32, { i32 } } %I, 1
  2981. // with
  2982. // %X = extractvalue { i32, { i32 } } %A, 1
  2983. // %E = insertvalue { i32 } %X, i32 42, 0
  2984. // by switching the order of the insert and extract (though the
  2985. // insertvalue should be left in, since it may have other uses).
  2986. Value *NewEV = Builder.CreateExtractValue(IV->getAggregateOperand(),
  2987. EV.getIndices());
  2988. return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(),
  2989. ArrayRef(insi, inse));
  2990. }
  2991. if (insi == inse)
  2992. // The insert list is a prefix of the extract list
  2993. // We can simply remove the common indices from the extract and make it
  2994. // operate on the inserted value instead of the insertvalue result.
  2995. // i.e., replace
  2996. // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
  2997. // %E = extractvalue { i32, { i32 } } %I, 1, 0
  2998. // with
  2999. // %E extractvalue { i32 } { i32 42 }, 0
  3000. return ExtractValueInst::Create(IV->getInsertedValueOperand(),
  3001. ArrayRef(exti, exte));
  3002. }
  3003. if (Instruction *R = foldExtractOfOverflowIntrinsic(EV))
  3004. return R;
  3005. if (LoadInst *L = dyn_cast<LoadInst>(Agg)) {
  3006. // If the (non-volatile) load only has one use, we can rewrite this to a
  3007. // load from a GEP. This reduces the size of the load. If a load is used
  3008. // only by extractvalue instructions then this either must have been
  3009. // optimized before, or it is a struct with padding, in which case we
  3010. // don't want to do the transformation as it loses padding knowledge.
  3011. if (L->isSimple() && L->hasOneUse()) {
  3012. // extractvalue has integer indices, getelementptr has Value*s. Convert.
  3013. SmallVector<Value*, 4> Indices;
  3014. // Prefix an i32 0 since we need the first element.
  3015. Indices.push_back(Builder.getInt32(0));
  3016. for (unsigned Idx : EV.indices())
  3017. Indices.push_back(Builder.getInt32(Idx));
  3018. // We need to insert these at the location of the old load, not at that of
  3019. // the extractvalue.
  3020. Builder.SetInsertPoint(L);
  3021. Value *GEP = Builder.CreateInBoundsGEP(L->getType(),
  3022. L->getPointerOperand(), Indices);
  3023. Instruction *NL = Builder.CreateLoad(EV.getType(), GEP);
  3024. // Whatever aliasing information we had for the orignal load must also
  3025. // hold for the smaller load, so propagate the annotations.
  3026. NL->setAAMetadata(L->getAAMetadata());
  3027. // Returning the load directly will cause the main loop to insert it in
  3028. // the wrong spot, so use replaceInstUsesWith().
  3029. return replaceInstUsesWith(EV, NL);
  3030. }
  3031. }
  3032. if (auto *PN = dyn_cast<PHINode>(Agg))
  3033. if (Instruction *Res = foldOpIntoPhi(EV, PN))
  3034. return Res;
  3035. // We could simplify extracts from other values. Note that nested extracts may
  3036. // already be simplified implicitly by the above: extract (extract (insert) )
  3037. // will be translated into extract ( insert ( extract ) ) first and then just
  3038. // the value inserted, if appropriate. Similarly for extracts from single-use
  3039. // loads: extract (extract (load)) will be translated to extract (load (gep))
  3040. // and if again single-use then via load (gep (gep)) to load (gep).
  3041. // However, double extracts from e.g. function arguments or return values
  3042. // aren't handled yet.
  3043. return nullptr;
  3044. }
  3045. /// Return 'true' if the given typeinfo will match anything.
  3046. static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo) {
  3047. switch (Personality) {
  3048. case EHPersonality::GNU_C:
  3049. case EHPersonality::GNU_C_SjLj:
  3050. case EHPersonality::Rust:
  3051. // The GCC C EH and Rust personality only exists to support cleanups, so
  3052. // it's not clear what the semantics of catch clauses are.
  3053. return false;
  3054. case EHPersonality::Unknown:
  3055. return false;
  3056. case EHPersonality::GNU_Ada:
  3057. // While __gnat_all_others_value will match any Ada exception, it doesn't
  3058. // match foreign exceptions (or didn't, before gcc-4.7).
  3059. return false;
  3060. case EHPersonality::GNU_CXX:
  3061. case EHPersonality::GNU_CXX_SjLj:
  3062. case EHPersonality::GNU_ObjC:
  3063. case EHPersonality::MSVC_X86SEH:
  3064. case EHPersonality::MSVC_TableSEH:
  3065. case EHPersonality::MSVC_CXX:
  3066. case EHPersonality::CoreCLR:
  3067. case EHPersonality::Wasm_CXX:
  3068. case EHPersonality::XL_CXX:
  3069. return TypeInfo->isNullValue();
  3070. }
  3071. llvm_unreachable("invalid enum");
  3072. }
  3073. static bool shorter_filter(const Value *LHS, const Value *RHS) {
  3074. return
  3075. cast<ArrayType>(LHS->getType())->getNumElements()
  3076. <
  3077. cast<ArrayType>(RHS->getType())->getNumElements();
  3078. }
  3079. Instruction *InstCombinerImpl::visitLandingPadInst(LandingPadInst &LI) {
  3080. // The logic here should be correct for any real-world personality function.
  3081. // However if that turns out not to be true, the offending logic can always
  3082. // be conditioned on the personality function, like the catch-all logic is.
  3083. EHPersonality Personality =
  3084. classifyEHPersonality(LI.getParent()->getParent()->getPersonalityFn());
  3085. // Simplify the list of clauses, eg by removing repeated catch clauses
  3086. // (these are often created by inlining).
  3087. bool MakeNewInstruction = false; // If true, recreate using the following:
  3088. SmallVector<Constant *, 16> NewClauses; // - Clauses for the new instruction;
  3089. bool CleanupFlag = LI.isCleanup(); // - The new instruction is a cleanup.
  3090. SmallPtrSet<Value *, 16> AlreadyCaught; // Typeinfos known caught already.
  3091. for (unsigned i = 0, e = LI.getNumClauses(); i != e; ++i) {
  3092. bool isLastClause = i + 1 == e;
  3093. if (LI.isCatch(i)) {
  3094. // A catch clause.
  3095. Constant *CatchClause = LI.getClause(i);
  3096. Constant *TypeInfo = CatchClause->stripPointerCasts();
  3097. // If we already saw this clause, there is no point in having a second
  3098. // copy of it.
  3099. if (AlreadyCaught.insert(TypeInfo).second) {
  3100. // This catch clause was not already seen.
  3101. NewClauses.push_back(CatchClause);
  3102. } else {
  3103. // Repeated catch clause - drop the redundant copy.
  3104. MakeNewInstruction = true;
  3105. }
  3106. // If this is a catch-all then there is no point in keeping any following
  3107. // clauses or marking the landingpad as having a cleanup.
  3108. if (isCatchAll(Personality, TypeInfo)) {
  3109. if (!isLastClause)
  3110. MakeNewInstruction = true;
  3111. CleanupFlag = false;
  3112. break;
  3113. }
  3114. } else {
  3115. // A filter clause. If any of the filter elements were already caught
  3116. // then they can be dropped from the filter. It is tempting to try to
  3117. // exploit the filter further by saying that any typeinfo that does not
  3118. // occur in the filter can't be caught later (and thus can be dropped).
  3119. // However this would be wrong, since typeinfos can match without being
  3120. // equal (for example if one represents a C++ class, and the other some
  3121. // class derived from it).
  3122. assert(LI.isFilter(i) && "Unsupported landingpad clause!");
  3123. Constant *FilterClause = LI.getClause(i);
  3124. ArrayType *FilterType = cast<ArrayType>(FilterClause->getType());
  3125. unsigned NumTypeInfos = FilterType->getNumElements();
  3126. // An empty filter catches everything, so there is no point in keeping any
  3127. // following clauses or marking the landingpad as having a cleanup. By
  3128. // dealing with this case here the following code is made a bit simpler.
  3129. if (!NumTypeInfos) {
  3130. NewClauses.push_back(FilterClause);
  3131. if (!isLastClause)
  3132. MakeNewInstruction = true;
  3133. CleanupFlag = false;
  3134. break;
  3135. }
  3136. bool MakeNewFilter = false; // If true, make a new filter.
  3137. SmallVector<Constant *, 16> NewFilterElts; // New elements.
  3138. if (isa<ConstantAggregateZero>(FilterClause)) {
  3139. // Not an empty filter - it contains at least one null typeinfo.
  3140. assert(NumTypeInfos > 0 && "Should have handled empty filter already!");
  3141. Constant *TypeInfo =
  3142. Constant::getNullValue(FilterType->getElementType());
  3143. // If this typeinfo is a catch-all then the filter can never match.
  3144. if (isCatchAll(Personality, TypeInfo)) {
  3145. // Throw the filter away.
  3146. MakeNewInstruction = true;
  3147. continue;
  3148. }
  3149. // There is no point in having multiple copies of this typeinfo, so
  3150. // discard all but the first copy if there is more than one.
  3151. NewFilterElts.push_back(TypeInfo);
  3152. if (NumTypeInfos > 1)
  3153. MakeNewFilter = true;
  3154. } else {
  3155. ConstantArray *Filter = cast<ConstantArray>(FilterClause);
  3156. SmallPtrSet<Value *, 16> SeenInFilter; // For uniquing the elements.
  3157. NewFilterElts.reserve(NumTypeInfos);
  3158. // Remove any filter elements that were already caught or that already
  3159. // occurred in the filter. While there, see if any of the elements are
  3160. // catch-alls. If so, the filter can be discarded.
  3161. bool SawCatchAll = false;
  3162. for (unsigned j = 0; j != NumTypeInfos; ++j) {
  3163. Constant *Elt = Filter->getOperand(j);
  3164. Constant *TypeInfo = Elt->stripPointerCasts();
  3165. if (isCatchAll(Personality, TypeInfo)) {
  3166. // This element is a catch-all. Bail out, noting this fact.
  3167. SawCatchAll = true;
  3168. break;
  3169. }
  3170. // Even if we've seen a type in a catch clause, we don't want to
  3171. // remove it from the filter. An unexpected type handler may be
  3172. // set up for a call site which throws an exception of the same
  3173. // type caught. In order for the exception thrown by the unexpected
  3174. // handler to propagate correctly, the filter must be correctly
  3175. // described for the call site.
  3176. //
  3177. // Example:
  3178. //
  3179. // void unexpected() { throw 1;}
  3180. // void foo() throw (int) {
  3181. // std::set_unexpected(unexpected);
  3182. // try {
  3183. // throw 2.0;
  3184. // } catch (int i) {}
  3185. // }
  3186. // There is no point in having multiple copies of the same typeinfo in
  3187. // a filter, so only add it if we didn't already.
  3188. if (SeenInFilter.insert(TypeInfo).second)
  3189. NewFilterElts.push_back(cast<Constant>(Elt));
  3190. }
  3191. // A filter containing a catch-all cannot match anything by definition.
  3192. if (SawCatchAll) {
  3193. // Throw the filter away.
  3194. MakeNewInstruction = true;
  3195. continue;
  3196. }
  3197. // If we dropped something from the filter, make a new one.
  3198. if (NewFilterElts.size() < NumTypeInfos)
  3199. MakeNewFilter = true;
  3200. }
  3201. if (MakeNewFilter) {
  3202. FilterType = ArrayType::get(FilterType->getElementType(),
  3203. NewFilterElts.size());
  3204. FilterClause = ConstantArray::get(FilterType, NewFilterElts);
  3205. MakeNewInstruction = true;
  3206. }
  3207. NewClauses.push_back(FilterClause);
  3208. // If the new filter is empty then it will catch everything so there is
  3209. // no point in keeping any following clauses or marking the landingpad
  3210. // as having a cleanup. The case of the original filter being empty was
  3211. // already handled above.
  3212. if (MakeNewFilter && !NewFilterElts.size()) {
  3213. assert(MakeNewInstruction && "New filter but not a new instruction!");
  3214. CleanupFlag = false;
  3215. break;
  3216. }
  3217. }
  3218. }
  3219. // If several filters occur in a row then reorder them so that the shortest
  3220. // filters come first (those with the smallest number of elements). This is
  3221. // advantageous because shorter filters are more likely to match, speeding up
  3222. // unwinding, but mostly because it increases the effectiveness of the other
  3223. // filter optimizations below.
  3224. for (unsigned i = 0, e = NewClauses.size(); i + 1 < e; ) {
  3225. unsigned j;
  3226. // Find the maximal 'j' s.t. the range [i, j) consists entirely of filters.
  3227. for (j = i; j != e; ++j)
  3228. if (!isa<ArrayType>(NewClauses[j]->getType()))
  3229. break;
  3230. // Check whether the filters are already sorted by length. We need to know
  3231. // if sorting them is actually going to do anything so that we only make a
  3232. // new landingpad instruction if it does.
  3233. for (unsigned k = i; k + 1 < j; ++k)
  3234. if (shorter_filter(NewClauses[k+1], NewClauses[k])) {
  3235. // Not sorted, so sort the filters now. Doing an unstable sort would be
  3236. // correct too but reordering filters pointlessly might confuse users.
  3237. std::stable_sort(NewClauses.begin() + i, NewClauses.begin() + j,
  3238. shorter_filter);
  3239. MakeNewInstruction = true;
  3240. break;
  3241. }
  3242. // Look for the next batch of filters.
  3243. i = j + 1;
  3244. }
  3245. // If typeinfos matched if and only if equal, then the elements of a filter L
  3246. // that occurs later than a filter F could be replaced by the intersection of
  3247. // the elements of F and L. In reality two typeinfos can match without being
  3248. // equal (for example if one represents a C++ class, and the other some class
  3249. // derived from it) so it would be wrong to perform this transform in general.
  3250. // However the transform is correct and useful if F is a subset of L. In that
  3251. // case L can be replaced by F, and thus removed altogether since repeating a
  3252. // filter is pointless. So here we look at all pairs of filters F and L where
  3253. // L follows F in the list of clauses, and remove L if every element of F is
  3254. // an element of L. This can occur when inlining C++ functions with exception
  3255. // specifications.
  3256. for (unsigned i = 0; i + 1 < NewClauses.size(); ++i) {
  3257. // Examine each filter in turn.
  3258. Value *Filter = NewClauses[i];
  3259. ArrayType *FTy = dyn_cast<ArrayType>(Filter->getType());
  3260. if (!FTy)
  3261. // Not a filter - skip it.
  3262. continue;
  3263. unsigned FElts = FTy->getNumElements();
  3264. // Examine each filter following this one. Doing this backwards means that
  3265. // we don't have to worry about filters disappearing under us when removed.
  3266. for (unsigned j = NewClauses.size() - 1; j != i; --j) {
  3267. Value *LFilter = NewClauses[j];
  3268. ArrayType *LTy = dyn_cast<ArrayType>(LFilter->getType());
  3269. if (!LTy)
  3270. // Not a filter - skip it.
  3271. continue;
  3272. // If Filter is a subset of LFilter, i.e. every element of Filter is also
  3273. // an element of LFilter, then discard LFilter.
  3274. SmallVectorImpl<Constant *>::iterator J = NewClauses.begin() + j;
  3275. // If Filter is empty then it is a subset of LFilter.
  3276. if (!FElts) {
  3277. // Discard LFilter.
  3278. NewClauses.erase(J);
  3279. MakeNewInstruction = true;
  3280. // Move on to the next filter.
  3281. continue;
  3282. }
  3283. unsigned LElts = LTy->getNumElements();
  3284. // If Filter is longer than LFilter then it cannot be a subset of it.
  3285. if (FElts > LElts)
  3286. // Move on to the next filter.
  3287. continue;
  3288. // At this point we know that LFilter has at least one element.
  3289. if (isa<ConstantAggregateZero>(LFilter)) { // LFilter only contains zeros.
  3290. // Filter is a subset of LFilter iff Filter contains only zeros (as we
  3291. // already know that Filter is not longer than LFilter).
  3292. if (isa<ConstantAggregateZero>(Filter)) {
  3293. assert(FElts <= LElts && "Should have handled this case earlier!");
  3294. // Discard LFilter.
  3295. NewClauses.erase(J);
  3296. MakeNewInstruction = true;
  3297. }
  3298. // Move on to the next filter.
  3299. continue;
  3300. }
  3301. ConstantArray *LArray = cast<ConstantArray>(LFilter);
  3302. if (isa<ConstantAggregateZero>(Filter)) { // Filter only contains zeros.
  3303. // Since Filter is non-empty and contains only zeros, it is a subset of
  3304. // LFilter iff LFilter contains a zero.
  3305. assert(FElts > 0 && "Should have eliminated the empty filter earlier!");
  3306. for (unsigned l = 0; l != LElts; ++l)
  3307. if (LArray->getOperand(l)->isNullValue()) {
  3308. // LFilter contains a zero - discard it.
  3309. NewClauses.erase(J);
  3310. MakeNewInstruction = true;
  3311. break;
  3312. }
  3313. // Move on to the next filter.
  3314. continue;
  3315. }
  3316. // At this point we know that both filters are ConstantArrays. Loop over
  3317. // operands to see whether every element of Filter is also an element of
  3318. // LFilter. Since filters tend to be short this is probably faster than
  3319. // using a method that scales nicely.
  3320. ConstantArray *FArray = cast<ConstantArray>(Filter);
  3321. bool AllFound = true;
  3322. for (unsigned f = 0; f != FElts; ++f) {
  3323. Value *FTypeInfo = FArray->getOperand(f)->stripPointerCasts();
  3324. AllFound = false;
  3325. for (unsigned l = 0; l != LElts; ++l) {
  3326. Value *LTypeInfo = LArray->getOperand(l)->stripPointerCasts();
  3327. if (LTypeInfo == FTypeInfo) {
  3328. AllFound = true;
  3329. break;
  3330. }
  3331. }
  3332. if (!AllFound)
  3333. break;
  3334. }
  3335. if (AllFound) {
  3336. // Discard LFilter.
  3337. NewClauses.erase(J);
  3338. MakeNewInstruction = true;
  3339. }
  3340. // Move on to the next filter.
  3341. }
  3342. }
  3343. // If we changed any of the clauses, replace the old landingpad instruction
  3344. // with a new one.
  3345. if (MakeNewInstruction) {
  3346. LandingPadInst *NLI = LandingPadInst::Create(LI.getType(),
  3347. NewClauses.size());
  3348. for (unsigned i = 0, e = NewClauses.size(); i != e; ++i)
  3349. NLI->addClause(NewClauses[i]);
  3350. // A landing pad with no clauses must have the cleanup flag set. It is
  3351. // theoretically possible, though highly unlikely, that we eliminated all
  3352. // clauses. If so, force the cleanup flag to true.
  3353. if (NewClauses.empty())
  3354. CleanupFlag = true;
  3355. NLI->setCleanup(CleanupFlag);
  3356. return NLI;
  3357. }
  3358. // Even if none of the clauses changed, we may nonetheless have understood
  3359. // that the cleanup flag is pointless. Clear it if so.
  3360. if (LI.isCleanup() != CleanupFlag) {
  3361. assert(!CleanupFlag && "Adding a cleanup, not removing one?!");
  3362. LI.setCleanup(CleanupFlag);
  3363. return &LI;
  3364. }
  3365. return nullptr;
  3366. }
  3367. Value *
  3368. InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating(FreezeInst &OrigFI) {
  3369. // Try to push freeze through instructions that propagate but don't produce
  3370. // poison as far as possible. If an operand of freeze follows three
  3371. // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
  3372. // guaranteed-non-poison operands then push the freeze through to the one
  3373. // operand that is not guaranteed non-poison. The actual transform is as
  3374. // follows.
  3375. // Op1 = ... ; Op1 can be posion
  3376. // Op0 = Inst(Op1, NonPoisonOps...) ; Op0 has only one use and only have
  3377. // ; single guaranteed-non-poison operands
  3378. // ... = Freeze(Op0)
  3379. // =>
  3380. // Op1 = ...
  3381. // Op1.fr = Freeze(Op1)
  3382. // ... = Inst(Op1.fr, NonPoisonOps...)
  3383. auto *OrigOp = OrigFI.getOperand(0);
  3384. auto *OrigOpInst = dyn_cast<Instruction>(OrigOp);
  3385. // While we could change the other users of OrigOp to use freeze(OrigOp), that
  3386. // potentially reduces their optimization potential, so let's only do this iff
  3387. // the OrigOp is only used by the freeze.
  3388. if (!OrigOpInst || !OrigOpInst->hasOneUse() || isa<PHINode>(OrigOp))
  3389. return nullptr;
  3390. // We can't push the freeze through an instruction which can itself create
  3391. // poison. If the only source of new poison is flags, we can simply
  3392. // strip them (since we know the only use is the freeze and nothing can
  3393. // benefit from them.)
  3394. if (canCreateUndefOrPoison(cast<Operator>(OrigOp),
  3395. /*ConsiderFlagsAndMetadata*/ false))
  3396. return nullptr;
  3397. // If operand is guaranteed not to be poison, there is no need to add freeze
  3398. // to the operand. So we first find the operand that is not guaranteed to be
  3399. // poison.
  3400. Use *MaybePoisonOperand = nullptr;
  3401. for (Use &U : OrigOpInst->operands()) {
  3402. if (isa<MetadataAsValue>(U.get()) ||
  3403. isGuaranteedNotToBeUndefOrPoison(U.get()))
  3404. continue;
  3405. if (!MaybePoisonOperand)
  3406. MaybePoisonOperand = &U;
  3407. else
  3408. return nullptr;
  3409. }
  3410. OrigOpInst->dropPoisonGeneratingFlagsAndMetadata();
  3411. // If all operands are guaranteed to be non-poison, we can drop freeze.
  3412. if (!MaybePoisonOperand)
  3413. return OrigOp;
  3414. Builder.SetInsertPoint(OrigOpInst);
  3415. auto *FrozenMaybePoisonOperand = Builder.CreateFreeze(
  3416. MaybePoisonOperand->get(), MaybePoisonOperand->get()->getName() + ".fr");
  3417. replaceUse(*MaybePoisonOperand, FrozenMaybePoisonOperand);
  3418. return OrigOp;
  3419. }
  3420. Instruction *InstCombinerImpl::foldFreezeIntoRecurrence(FreezeInst &FI,
  3421. PHINode *PN) {
  3422. // Detect whether this is a recurrence with a start value and some number of
  3423. // backedge values. We'll check whether we can push the freeze through the
  3424. // backedge values (possibly dropping poison flags along the way) until we
  3425. // reach the phi again. In that case, we can move the freeze to the start
  3426. // value.
  3427. Use *StartU = nullptr;
  3428. SmallVector<Value *> Worklist;
  3429. for (Use &U : PN->incoming_values()) {
  3430. if (DT.dominates(PN->getParent(), PN->getIncomingBlock(U))) {
  3431. // Add backedge value to worklist.
  3432. Worklist.push_back(U.get());
  3433. continue;
  3434. }
  3435. // Don't bother handling multiple start values.
  3436. if (StartU)
  3437. return nullptr;
  3438. StartU = &U;
  3439. }
  3440. if (!StartU || Worklist.empty())
  3441. return nullptr; // Not a recurrence.
  3442. Value *StartV = StartU->get();
  3443. BasicBlock *StartBB = PN->getIncomingBlock(*StartU);
  3444. bool StartNeedsFreeze = !isGuaranteedNotToBeUndefOrPoison(StartV);
  3445. // We can't insert freeze if the the start value is the result of the
  3446. // terminator (e.g. an invoke).
  3447. if (StartNeedsFreeze && StartBB->getTerminator() == StartV)
  3448. return nullptr;
  3449. SmallPtrSet<Value *, 32> Visited;
  3450. SmallVector<Instruction *> DropFlags;
  3451. while (!Worklist.empty()) {
  3452. Value *V = Worklist.pop_back_val();
  3453. if (!Visited.insert(V).second)
  3454. continue;
  3455. if (Visited.size() > 32)
  3456. return nullptr; // Limit the total number of values we inspect.
  3457. // Assume that PN is non-poison, because it will be after the transform.
  3458. if (V == PN || isGuaranteedNotToBeUndefOrPoison(V))
  3459. continue;
  3460. Instruction *I = dyn_cast<Instruction>(V);
  3461. if (!I || canCreateUndefOrPoison(cast<Operator>(I),
  3462. /*ConsiderFlagsAndMetadata*/ false))
  3463. return nullptr;
  3464. DropFlags.push_back(I);
  3465. append_range(Worklist, I->operands());
  3466. }
  3467. for (Instruction *I : DropFlags)
  3468. I->dropPoisonGeneratingFlagsAndMetadata();
  3469. if (StartNeedsFreeze) {
  3470. Builder.SetInsertPoint(StartBB->getTerminator());
  3471. Value *FrozenStartV = Builder.CreateFreeze(StartV,
  3472. StartV->getName() + ".fr");
  3473. replaceUse(*StartU, FrozenStartV);
  3474. }
  3475. return replaceInstUsesWith(FI, PN);
  3476. }
  3477. bool InstCombinerImpl::freezeOtherUses(FreezeInst &FI) {
  3478. Value *Op = FI.getOperand(0);
  3479. if (isa<Constant>(Op) || Op->hasOneUse())
  3480. return false;
  3481. // Move the freeze directly after the definition of its operand, so that
  3482. // it dominates the maximum number of uses. Note that it may not dominate
  3483. // *all* uses if the operand is an invoke/callbr and the use is in a phi on
  3484. // the normal/default destination. This is why the domination check in the
  3485. // replacement below is still necessary.
  3486. Instruction *MoveBefore;
  3487. if (isa<Argument>(Op)) {
  3488. MoveBefore =
  3489. &*FI.getFunction()->getEntryBlock().getFirstNonPHIOrDbgOrAlloca();
  3490. } else {
  3491. MoveBefore = cast<Instruction>(Op)->getInsertionPointAfterDef();
  3492. if (!MoveBefore)
  3493. return false;
  3494. }
  3495. bool Changed = false;
  3496. if (&FI != MoveBefore) {
  3497. FI.moveBefore(MoveBefore);
  3498. Changed = true;
  3499. }
  3500. Op->replaceUsesWithIf(&FI, [&](Use &U) -> bool {
  3501. bool Dominates = DT.dominates(&FI, U);
  3502. Changed |= Dominates;
  3503. return Dominates;
  3504. });
  3505. return Changed;
  3506. }
  3507. Instruction *InstCombinerImpl::visitFreeze(FreezeInst &I) {
  3508. Value *Op0 = I.getOperand(0);
  3509. if (Value *V = simplifyFreezeInst(Op0, SQ.getWithInstruction(&I)))
  3510. return replaceInstUsesWith(I, V);
  3511. // freeze (phi const, x) --> phi const, (freeze x)
  3512. if (auto *PN = dyn_cast<PHINode>(Op0)) {
  3513. if (Instruction *NV = foldOpIntoPhi(I, PN))
  3514. return NV;
  3515. if (Instruction *NV = foldFreezeIntoRecurrence(I, PN))
  3516. return NV;
  3517. }
  3518. if (Value *NI = pushFreezeToPreventPoisonFromPropagating(I))
  3519. return replaceInstUsesWith(I, NI);
  3520. // If I is freeze(undef), check its uses and fold it to a fixed constant.
  3521. // - or: pick -1
  3522. // - select's condition: if the true value is constant, choose it by making
  3523. // the condition true.
  3524. // - default: pick 0
  3525. //
  3526. // Note that this transform is intentionally done here rather than
  3527. // via an analysis in InstSimplify or at individual user sites. That is
  3528. // because we must produce the same value for all uses of the freeze -
  3529. // it's the reason "freeze" exists!
  3530. //
  3531. // TODO: This could use getBinopAbsorber() / getBinopIdentity() to avoid
  3532. // duplicating logic for binops at least.
  3533. auto getUndefReplacement = [&I](Type *Ty) {
  3534. Constant *BestValue = nullptr;
  3535. Constant *NullValue = Constant::getNullValue(Ty);
  3536. for (const auto *U : I.users()) {
  3537. Constant *C = NullValue;
  3538. if (match(U, m_Or(m_Value(), m_Value())))
  3539. C = ConstantInt::getAllOnesValue(Ty);
  3540. else if (match(U, m_Select(m_Specific(&I), m_Constant(), m_Value())))
  3541. C = ConstantInt::getTrue(Ty);
  3542. if (!BestValue)
  3543. BestValue = C;
  3544. else if (BestValue != C)
  3545. BestValue = NullValue;
  3546. }
  3547. assert(BestValue && "Must have at least one use");
  3548. return BestValue;
  3549. };
  3550. if (match(Op0, m_Undef()))
  3551. return replaceInstUsesWith(I, getUndefReplacement(I.getType()));
  3552. Constant *C;
  3553. if (match(Op0, m_Constant(C)) && C->containsUndefOrPoisonElement()) {
  3554. Constant *ReplaceC = getUndefReplacement(I.getType()->getScalarType());
  3555. return replaceInstUsesWith(I, Constant::replaceUndefsWith(C, ReplaceC));
  3556. }
  3557. // Replace uses of Op with freeze(Op).
  3558. if (freezeOtherUses(I))
  3559. return &I;
  3560. return nullptr;
  3561. }
  3562. /// Check for case where the call writes to an otherwise dead alloca. This
  3563. /// shows up for unused out-params in idiomatic C/C++ code. Note that this
  3564. /// helper *only* analyzes the write; doesn't check any other legality aspect.
  3565. static bool SoleWriteToDeadLocal(Instruction *I, TargetLibraryInfo &TLI) {
  3566. auto *CB = dyn_cast<CallBase>(I);
  3567. if (!CB)
  3568. // TODO: handle e.g. store to alloca here - only worth doing if we extend
  3569. // to allow reload along used path as described below. Otherwise, this
  3570. // is simply a store to a dead allocation which will be removed.
  3571. return false;
  3572. std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(CB, TLI);
  3573. if (!Dest)
  3574. return false;
  3575. auto *AI = dyn_cast<AllocaInst>(getUnderlyingObject(Dest->Ptr));
  3576. if (!AI)
  3577. // TODO: allow malloc?
  3578. return false;
  3579. // TODO: allow memory access dominated by move point? Note that since AI
  3580. // could have a reference to itself captured by the call, we would need to
  3581. // account for cycles in doing so.
  3582. SmallVector<const User *> AllocaUsers;
  3583. SmallPtrSet<const User *, 4> Visited;
  3584. auto pushUsers = [&](const Instruction &I) {
  3585. for (const User *U : I.users()) {
  3586. if (Visited.insert(U).second)
  3587. AllocaUsers.push_back(U);
  3588. }
  3589. };
  3590. pushUsers(*AI);
  3591. while (!AllocaUsers.empty()) {
  3592. auto *UserI = cast<Instruction>(AllocaUsers.pop_back_val());
  3593. if (isa<BitCastInst>(UserI) || isa<GetElementPtrInst>(UserI) ||
  3594. isa<AddrSpaceCastInst>(UserI)) {
  3595. pushUsers(*UserI);
  3596. continue;
  3597. }
  3598. if (UserI == CB)
  3599. continue;
  3600. // TODO: support lifetime.start/end here
  3601. return false;
  3602. }
  3603. return true;
  3604. }
  3605. /// Try to move the specified instruction from its current block into the
  3606. /// beginning of DestBlock, which can only happen if it's safe to move the
  3607. /// instruction past all of the instructions between it and the end of its
  3608. /// block.
  3609. static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock,
  3610. TargetLibraryInfo &TLI) {
  3611. BasicBlock *SrcBlock = I->getParent();
  3612. // Cannot move control-flow-involving, volatile loads, vaarg, etc.
  3613. if (isa<PHINode>(I) || I->isEHPad() || I->mayThrow() || !I->willReturn() ||
  3614. I->isTerminator())
  3615. return false;
  3616. // Do not sink static or dynamic alloca instructions. Static allocas must
  3617. // remain in the entry block, and dynamic allocas must not be sunk in between
  3618. // a stacksave / stackrestore pair, which would incorrectly shorten its
  3619. // lifetime.
  3620. if (isa<AllocaInst>(I))
  3621. return false;
  3622. // Do not sink into catchswitch blocks.
  3623. if (isa<CatchSwitchInst>(DestBlock->getTerminator()))
  3624. return false;
  3625. // Do not sink convergent call instructions.
  3626. if (auto *CI = dyn_cast<CallInst>(I)) {
  3627. if (CI->isConvergent())
  3628. return false;
  3629. }
  3630. // Unless we can prove that the memory write isn't visibile except on the
  3631. // path we're sinking to, we must bail.
  3632. if (I->mayWriteToMemory()) {
  3633. if (!SoleWriteToDeadLocal(I, TLI))
  3634. return false;
  3635. }
  3636. // We can only sink load instructions if there is nothing between the load and
  3637. // the end of block that could change the value.
  3638. if (I->mayReadFromMemory()) {
  3639. // We don't want to do any sophisticated alias analysis, so we only check
  3640. // the instructions after I in I's parent block if we try to sink to its
  3641. // successor block.
  3642. if (DestBlock->getUniquePredecessor() != I->getParent())
  3643. return false;
  3644. for (BasicBlock::iterator Scan = std::next(I->getIterator()),
  3645. E = I->getParent()->end();
  3646. Scan != E; ++Scan)
  3647. if (Scan->mayWriteToMemory())
  3648. return false;
  3649. }
  3650. I->dropDroppableUses([DestBlock](const Use *U) {
  3651. if (auto *I = dyn_cast<Instruction>(U->getUser()))
  3652. return I->getParent() != DestBlock;
  3653. return true;
  3654. });
  3655. /// FIXME: We could remove droppable uses that are not dominated by
  3656. /// the new position.
  3657. BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt();
  3658. I->moveBefore(&*InsertPos);
  3659. ++NumSunkInst;
  3660. // Also sink all related debug uses from the source basic block. Otherwise we
  3661. // get debug use before the def. Attempt to salvage debug uses first, to
  3662. // maximise the range variables have location for. If we cannot salvage, then
  3663. // mark the location undef: we know it was supposed to receive a new location
  3664. // here, but that computation has been sunk.
  3665. SmallVector<DbgVariableIntrinsic *, 2> DbgUsers;
  3666. findDbgUsers(DbgUsers, I);
  3667. // Process the sinking DbgUsers in reverse order, as we only want to clone the
  3668. // last appearing debug intrinsic for each given variable.
  3669. SmallVector<DbgVariableIntrinsic *, 2> DbgUsersToSink;
  3670. for (DbgVariableIntrinsic *DVI : DbgUsers)
  3671. if (DVI->getParent() == SrcBlock)
  3672. DbgUsersToSink.push_back(DVI);
  3673. llvm::sort(DbgUsersToSink,
  3674. [](auto *A, auto *B) { return B->comesBefore(A); });
  3675. SmallVector<DbgVariableIntrinsic *, 2> DIIClones;
  3676. SmallSet<DebugVariable, 4> SunkVariables;
  3677. for (auto *User : DbgUsersToSink) {
  3678. // A dbg.declare instruction should not be cloned, since there can only be
  3679. // one per variable fragment. It should be left in the original place
  3680. // because the sunk instruction is not an alloca (otherwise we could not be
  3681. // here).
  3682. if (isa<DbgDeclareInst>(User))
  3683. continue;
  3684. DebugVariable DbgUserVariable =
  3685. DebugVariable(User->getVariable(), User->getExpression(),
  3686. User->getDebugLoc()->getInlinedAt());
  3687. if (!SunkVariables.insert(DbgUserVariable).second)
  3688. continue;
  3689. // Leave dbg.assign intrinsics in their original positions and there should
  3690. // be no need to insert a clone.
  3691. if (isa<DbgAssignIntrinsic>(User))
  3692. continue;
  3693. DIIClones.emplace_back(cast<DbgVariableIntrinsic>(User->clone()));
  3694. if (isa<DbgDeclareInst>(User) && isa<CastInst>(I))
  3695. DIIClones.back()->replaceVariableLocationOp(I, I->getOperand(0));
  3696. LLVM_DEBUG(dbgs() << "CLONE: " << *DIIClones.back() << '\n');
  3697. }
  3698. // Perform salvaging without the clones, then sink the clones.
  3699. if (!DIIClones.empty()) {
  3700. salvageDebugInfoForDbgValues(*I, DbgUsers);
  3701. // The clones are in reverse order of original appearance, reverse again to
  3702. // maintain the original order.
  3703. for (auto &DIIClone : llvm::reverse(DIIClones)) {
  3704. DIIClone->insertBefore(&*InsertPos);
  3705. LLVM_DEBUG(dbgs() << "SINK: " << *DIIClone << '\n');
  3706. }
  3707. }
  3708. return true;
  3709. }
  3710. bool InstCombinerImpl::run() {
  3711. while (!Worklist.isEmpty()) {
  3712. // Walk deferred instructions in reverse order, and push them to the
  3713. // worklist, which means they'll end up popped from the worklist in-order.
  3714. while (Instruction *I = Worklist.popDeferred()) {
  3715. // Check to see if we can DCE the instruction. We do this already here to
  3716. // reduce the number of uses and thus allow other folds to trigger.
  3717. // Note that eraseInstFromFunction() may push additional instructions on
  3718. // the deferred worklist, so this will DCE whole instruction chains.
  3719. if (isInstructionTriviallyDead(I, &TLI)) {
  3720. eraseInstFromFunction(*I);
  3721. ++NumDeadInst;
  3722. continue;
  3723. }
  3724. Worklist.push(I);
  3725. }
  3726. Instruction *I = Worklist.removeOne();
  3727. if (I == nullptr) continue; // skip null values.
  3728. // Check to see if we can DCE the instruction.
  3729. if (isInstructionTriviallyDead(I, &TLI)) {
  3730. eraseInstFromFunction(*I);
  3731. ++NumDeadInst;
  3732. continue;
  3733. }
  3734. if (!DebugCounter::shouldExecute(VisitCounter))
  3735. continue;
  3736. // Instruction isn't dead, see if we can constant propagate it.
  3737. if (!I->use_empty() &&
  3738. (I->getNumOperands() == 0 || isa<Constant>(I->getOperand(0)))) {
  3739. if (Constant *C = ConstantFoldInstruction(I, DL, &TLI)) {
  3740. LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << *I
  3741. << '\n');
  3742. // Add operands to the worklist.
  3743. replaceInstUsesWith(*I, C);
  3744. ++NumConstProp;
  3745. if (isInstructionTriviallyDead(I, &TLI))
  3746. eraseInstFromFunction(*I);
  3747. MadeIRChange = true;
  3748. continue;
  3749. }
  3750. }
  3751. // See if we can trivially sink this instruction to its user if we can
  3752. // prove that the successor is not executed more frequently than our block.
  3753. // Return the UserBlock if successful.
  3754. auto getOptionalSinkBlockForInst =
  3755. [this](Instruction *I) -> std::optional<BasicBlock *> {
  3756. if (!EnableCodeSinking)
  3757. return std::nullopt;
  3758. BasicBlock *BB = I->getParent();
  3759. BasicBlock *UserParent = nullptr;
  3760. unsigned NumUsers = 0;
  3761. for (auto *U : I->users()) {
  3762. if (U->isDroppable())
  3763. continue;
  3764. if (NumUsers > MaxSinkNumUsers)
  3765. return std::nullopt;
  3766. Instruction *UserInst = cast<Instruction>(U);
  3767. // Special handling for Phi nodes - get the block the use occurs in.
  3768. if (PHINode *PN = dyn_cast<PHINode>(UserInst)) {
  3769. for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
  3770. if (PN->getIncomingValue(i) == I) {
  3771. // Bail out if we have uses in different blocks. We don't do any
  3772. // sophisticated analysis (i.e finding NearestCommonDominator of
  3773. // these use blocks).
  3774. if (UserParent && UserParent != PN->getIncomingBlock(i))
  3775. return std::nullopt;
  3776. UserParent = PN->getIncomingBlock(i);
  3777. }
  3778. }
  3779. assert(UserParent && "expected to find user block!");
  3780. } else {
  3781. if (UserParent && UserParent != UserInst->getParent())
  3782. return std::nullopt;
  3783. UserParent = UserInst->getParent();
  3784. }
  3785. // Make sure these checks are done only once, naturally we do the checks
  3786. // the first time we get the userparent, this will save compile time.
  3787. if (NumUsers == 0) {
  3788. // Try sinking to another block. If that block is unreachable, then do
  3789. // not bother. SimplifyCFG should handle it.
  3790. if (UserParent == BB || !DT.isReachableFromEntry(UserParent))
  3791. return std::nullopt;
  3792. auto *Term = UserParent->getTerminator();
  3793. // See if the user is one of our successors that has only one
  3794. // predecessor, so that we don't have to split the critical edge.
  3795. // Another option where we can sink is a block that ends with a
  3796. // terminator that does not pass control to other block (such as
  3797. // return or unreachable or resume). In this case:
  3798. // - I dominates the User (by SSA form);
  3799. // - the User will be executed at most once.
  3800. // So sinking I down to User is always profitable or neutral.
  3801. if (UserParent->getUniquePredecessor() != BB && !succ_empty(Term))
  3802. return std::nullopt;
  3803. assert(DT.dominates(BB, UserParent) && "Dominance relation broken?");
  3804. }
  3805. NumUsers++;
  3806. }
  3807. // No user or only has droppable users.
  3808. if (!UserParent)
  3809. return std::nullopt;
  3810. return UserParent;
  3811. };
  3812. auto OptBB = getOptionalSinkBlockForInst(I);
  3813. if (OptBB) {
  3814. auto *UserParent = *OptBB;
  3815. // Okay, the CFG is simple enough, try to sink this instruction.
  3816. if (TryToSinkInstruction(I, UserParent, TLI)) {
  3817. LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n');
  3818. MadeIRChange = true;
  3819. // We'll add uses of the sunk instruction below, but since
  3820. // sinking can expose opportunities for it's *operands* add
  3821. // them to the worklist
  3822. for (Use &U : I->operands())
  3823. if (Instruction *OpI = dyn_cast<Instruction>(U.get()))
  3824. Worklist.push(OpI);
  3825. }
  3826. }
  3827. // Now that we have an instruction, try combining it to simplify it.
  3828. Builder.SetInsertPoint(I);
  3829. Builder.CollectMetadataToCopy(
  3830. I, {LLVMContext::MD_dbg, LLVMContext::MD_annotation});
  3831. #ifndef NDEBUG
  3832. std::string OrigI;
  3833. #endif
  3834. LLVM_DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str(););
  3835. LLVM_DEBUG(dbgs() << "IC: Visiting: " << OrigI << '\n');
  3836. if (Instruction *Result = visit(*I)) {
  3837. ++NumCombined;
  3838. // Should we replace the old instruction with a new one?
  3839. if (Result != I) {
  3840. LLVM_DEBUG(dbgs() << "IC: Old = " << *I << '\n'
  3841. << " New = " << *Result << '\n');
  3842. Result->copyMetadata(*I,
  3843. {LLVMContext::MD_dbg, LLVMContext::MD_annotation});
  3844. // Everything uses the new instruction now.
  3845. I->replaceAllUsesWith(Result);
  3846. // Move the name to the new instruction first.
  3847. Result->takeName(I);
  3848. // Insert the new instruction into the basic block...
  3849. BasicBlock *InstParent = I->getParent();
  3850. BasicBlock::iterator InsertPos = I->getIterator();
  3851. // Are we replace a PHI with something that isn't a PHI, or vice versa?
  3852. if (isa<PHINode>(Result) != isa<PHINode>(I)) {
  3853. // We need to fix up the insertion point.
  3854. if (isa<PHINode>(I)) // PHI -> Non-PHI
  3855. InsertPos = InstParent->getFirstInsertionPt();
  3856. else // Non-PHI -> PHI
  3857. InsertPos = InstParent->getFirstNonPHI()->getIterator();
  3858. }
  3859. Result->insertInto(InstParent, InsertPos);
  3860. // Push the new instruction and any users onto the worklist.
  3861. Worklist.pushUsersToWorkList(*Result);
  3862. Worklist.push(Result);
  3863. eraseInstFromFunction(*I);
  3864. } else {
  3865. LLVM_DEBUG(dbgs() << "IC: Mod = " << OrigI << '\n'
  3866. << " New = " << *I << '\n');
  3867. // If the instruction was modified, it's possible that it is now dead.
  3868. // if so, remove it.
  3869. if (isInstructionTriviallyDead(I, &TLI)) {
  3870. eraseInstFromFunction(*I);
  3871. } else {
  3872. Worklist.pushUsersToWorkList(*I);
  3873. Worklist.push(I);
  3874. }
  3875. }
  3876. MadeIRChange = true;
  3877. }
  3878. }
  3879. Worklist.zap();
  3880. return MadeIRChange;
  3881. }
  3882. // Track the scopes used by !alias.scope and !noalias. In a function, a
  3883. // @llvm.experimental.noalias.scope.decl is only useful if that scope is used
  3884. // by both sets. If not, the declaration of the scope can be safely omitted.
  3885. // The MDNode of the scope can be omitted as well for the instructions that are
  3886. // part of this function. We do not do that at this point, as this might become
  3887. // too time consuming to do.
  3888. class AliasScopeTracker {
  3889. SmallPtrSet<const MDNode *, 8> UsedAliasScopesAndLists;
  3890. SmallPtrSet<const MDNode *, 8> UsedNoAliasScopesAndLists;
  3891. public:
  3892. void analyse(Instruction *I) {
  3893. // This seems to be faster than checking 'mayReadOrWriteMemory()'.
  3894. if (!I->hasMetadataOtherThanDebugLoc())
  3895. return;
  3896. auto Track = [](Metadata *ScopeList, auto &Container) {
  3897. const auto *MDScopeList = dyn_cast_or_null<MDNode>(ScopeList);
  3898. if (!MDScopeList || !Container.insert(MDScopeList).second)
  3899. return;
  3900. for (const auto &MDOperand : MDScopeList->operands())
  3901. if (auto *MDScope = dyn_cast<MDNode>(MDOperand))
  3902. Container.insert(MDScope);
  3903. };
  3904. Track(I->getMetadata(LLVMContext::MD_alias_scope), UsedAliasScopesAndLists);
  3905. Track(I->getMetadata(LLVMContext::MD_noalias), UsedNoAliasScopesAndLists);
  3906. }
  3907. bool isNoAliasScopeDeclDead(Instruction *Inst) {
  3908. NoAliasScopeDeclInst *Decl = dyn_cast<NoAliasScopeDeclInst>(Inst);
  3909. if (!Decl)
  3910. return false;
  3911. assert(Decl->use_empty() &&
  3912. "llvm.experimental.noalias.scope.decl in use ?");
  3913. const MDNode *MDSL = Decl->getScopeList();
  3914. assert(MDSL->getNumOperands() == 1 &&
  3915. "llvm.experimental.noalias.scope should refer to a single scope");
  3916. auto &MDOperand = MDSL->getOperand(0);
  3917. if (auto *MD = dyn_cast<MDNode>(MDOperand))
  3918. return !UsedAliasScopesAndLists.contains(MD) ||
  3919. !UsedNoAliasScopesAndLists.contains(MD);
  3920. // Not an MDNode ? throw away.
  3921. return true;
  3922. }
  3923. };
  3924. /// Populate the IC worklist from a function, by walking it in depth-first
  3925. /// order and adding all reachable code to the worklist.
  3926. ///
  3927. /// This has a couple of tricks to make the code faster and more powerful. In
  3928. /// particular, we constant fold and DCE instructions as we go, to avoid adding
  3929. /// them to the worklist (this significantly speeds up instcombine on code where
  3930. /// many instructions are dead or constant). Additionally, if we find a branch
  3931. /// whose condition is a known constant, we only visit the reachable successors.
  3932. static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
  3933. const TargetLibraryInfo *TLI,
  3934. InstructionWorklist &ICWorklist) {
  3935. bool MadeIRChange = false;
  3936. SmallPtrSet<BasicBlock *, 32> Visited;
  3937. SmallVector<BasicBlock*, 256> Worklist;
  3938. Worklist.push_back(&F.front());
  3939. SmallVector<Instruction *, 128> InstrsForInstructionWorklist;
  3940. DenseMap<Constant *, Constant *> FoldedConstants;
  3941. AliasScopeTracker SeenAliasScopes;
  3942. do {
  3943. BasicBlock *BB = Worklist.pop_back_val();
  3944. // We have now visited this block! If we've already been here, ignore it.
  3945. if (!Visited.insert(BB).second)
  3946. continue;
  3947. for (Instruction &Inst : llvm::make_early_inc_range(*BB)) {
  3948. // ConstantProp instruction if trivially constant.
  3949. if (!Inst.use_empty() &&
  3950. (Inst.getNumOperands() == 0 || isa<Constant>(Inst.getOperand(0))))
  3951. if (Constant *C = ConstantFoldInstruction(&Inst, DL, TLI)) {
  3952. LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << Inst
  3953. << '\n');
  3954. Inst.replaceAllUsesWith(C);
  3955. ++NumConstProp;
  3956. if (isInstructionTriviallyDead(&Inst, TLI))
  3957. Inst.eraseFromParent();
  3958. MadeIRChange = true;
  3959. continue;
  3960. }
  3961. // See if we can constant fold its operands.
  3962. for (Use &U : Inst.operands()) {
  3963. if (!isa<ConstantVector>(U) && !isa<ConstantExpr>(U))
  3964. continue;
  3965. auto *C = cast<Constant>(U);
  3966. Constant *&FoldRes = FoldedConstants[C];
  3967. if (!FoldRes)
  3968. FoldRes = ConstantFoldConstant(C, DL, TLI);
  3969. if (FoldRes != C) {
  3970. LLVM_DEBUG(dbgs() << "IC: ConstFold operand of: " << Inst
  3971. << "\n Old = " << *C
  3972. << "\n New = " << *FoldRes << '\n');
  3973. U = FoldRes;
  3974. MadeIRChange = true;
  3975. }
  3976. }
  3977. // Skip processing debug and pseudo intrinsics in InstCombine. Processing
  3978. // these call instructions consumes non-trivial amount of time and
  3979. // provides no value for the optimization.
  3980. if (!Inst.isDebugOrPseudoInst()) {
  3981. InstrsForInstructionWorklist.push_back(&Inst);
  3982. SeenAliasScopes.analyse(&Inst);
  3983. }
  3984. }
  3985. // Recursively visit successors. If this is a branch or switch on a
  3986. // constant, only visit the reachable successor.
  3987. Instruction *TI = BB->getTerminator();
  3988. if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
  3989. if (BI->isConditional() && isa<ConstantInt>(BI->getCondition())) {
  3990. bool CondVal = cast<ConstantInt>(BI->getCondition())->getZExtValue();
  3991. BasicBlock *ReachableBB = BI->getSuccessor(!CondVal);
  3992. Worklist.push_back(ReachableBB);
  3993. continue;
  3994. }
  3995. } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
  3996. if (ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition())) {
  3997. Worklist.push_back(SI->findCaseValue(Cond)->getCaseSuccessor());
  3998. continue;
  3999. }
  4000. }
  4001. append_range(Worklist, successors(TI));
  4002. } while (!Worklist.empty());
  4003. // Remove instructions inside unreachable blocks. This prevents the
  4004. // instcombine code from having to deal with some bad special cases, and
  4005. // reduces use counts of instructions.
  4006. for (BasicBlock &BB : F) {
  4007. if (Visited.count(&BB))
  4008. continue;
  4009. unsigned NumDeadInstInBB;
  4010. unsigned NumDeadDbgInstInBB;
  4011. std::tie(NumDeadInstInBB, NumDeadDbgInstInBB) =
  4012. removeAllNonTerminatorAndEHPadInstructions(&BB);
  4013. MadeIRChange |= NumDeadInstInBB + NumDeadDbgInstInBB > 0;
  4014. NumDeadInst += NumDeadInstInBB;
  4015. }
  4016. // Once we've found all of the instructions to add to instcombine's worklist,
  4017. // add them in reverse order. This way instcombine will visit from the top
  4018. // of the function down. This jives well with the way that it adds all uses
  4019. // of instructions to the worklist after doing a transformation, thus avoiding
  4020. // some N^2 behavior in pathological cases.
  4021. ICWorklist.reserve(InstrsForInstructionWorklist.size());
  4022. for (Instruction *Inst : reverse(InstrsForInstructionWorklist)) {
  4023. // DCE instruction if trivially dead. As we iterate in reverse program
  4024. // order here, we will clean up whole chains of dead instructions.
  4025. if (isInstructionTriviallyDead(Inst, TLI) ||
  4026. SeenAliasScopes.isNoAliasScopeDeclDead(Inst)) {
  4027. ++NumDeadInst;
  4028. LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
  4029. salvageDebugInfo(*Inst);
  4030. Inst->eraseFromParent();
  4031. MadeIRChange = true;
  4032. continue;
  4033. }
  4034. ICWorklist.push(Inst);
  4035. }
  4036. return MadeIRChange;
  4037. }
  4038. static bool combineInstructionsOverFunction(
  4039. Function &F, InstructionWorklist &Worklist, AliasAnalysis *AA,
  4040. AssumptionCache &AC, TargetLibraryInfo &TLI, TargetTransformInfo &TTI,
  4041. DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI,
  4042. ProfileSummaryInfo *PSI, unsigned MaxIterations, LoopInfo *LI) {
  4043. auto &DL = F.getParent()->getDataLayout();
  4044. MaxIterations = std::min(MaxIterations, LimitMaxIterations.getValue());
  4045. /// Builder - This is an IRBuilder that automatically inserts new
  4046. /// instructions into the worklist when they are created.
  4047. IRBuilder<TargetFolder, IRBuilderCallbackInserter> Builder(
  4048. F.getContext(), TargetFolder(DL),
  4049. IRBuilderCallbackInserter([&Worklist, &AC](Instruction *I) {
  4050. Worklist.add(I);
  4051. if (auto *Assume = dyn_cast<AssumeInst>(I))
  4052. AC.registerAssumption(Assume);
  4053. }));
  4054. // Lower dbg.declare intrinsics otherwise their value may be clobbered
  4055. // by instcombiner.
  4056. bool MadeIRChange = false;
  4057. if (ShouldLowerDbgDeclare)
  4058. MadeIRChange = LowerDbgDeclare(F);
  4059. // LowerDbgDeclare calls RemoveRedundantDbgInstrs, but LowerDbgDeclare will
  4060. // almost never return true when running an assignment tracking build. Take
  4061. // this opportunity to do some clean up for assignment tracking builds too.
  4062. if (!MadeIRChange && isAssignmentTrackingEnabled(*F.getParent())) {
  4063. for (auto &BB : F)
  4064. RemoveRedundantDbgInstrs(&BB);
  4065. }
  4066. // Iterate while there is work to do.
  4067. unsigned Iteration = 0;
  4068. while (true) {
  4069. ++NumWorklistIterations;
  4070. ++Iteration;
  4071. if (Iteration > InfiniteLoopDetectionThreshold) {
  4072. report_fatal_error(
  4073. "Instruction Combining seems stuck in an infinite loop after " +
  4074. Twine(InfiniteLoopDetectionThreshold) + " iterations.");
  4075. }
  4076. if (Iteration > MaxIterations) {
  4077. LLVM_DEBUG(dbgs() << "\n\n[IC] Iteration limit #" << MaxIterations
  4078. << " on " << F.getName()
  4079. << " reached; stopping before reaching a fixpoint\n");
  4080. break;
  4081. }
  4082. LLVM_DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
  4083. << F.getName() << "\n");
  4084. MadeIRChange |= prepareICWorklistFromFunction(F, DL, &TLI, Worklist);
  4085. InstCombinerImpl IC(Worklist, Builder, F.hasMinSize(), AA, AC, TLI, TTI, DT,
  4086. ORE, BFI, PSI, DL, LI);
  4087. IC.MaxArraySizeForCombine = MaxArraySize;
  4088. if (!IC.run())
  4089. break;
  4090. MadeIRChange = true;
  4091. }
  4092. return MadeIRChange;
  4093. }
  4094. InstCombinePass::InstCombinePass() : MaxIterations(LimitMaxIterations) {}
  4095. InstCombinePass::InstCombinePass(unsigned MaxIterations)
  4096. : MaxIterations(MaxIterations) {}
  4097. PreservedAnalyses InstCombinePass::run(Function &F,
  4098. FunctionAnalysisManager &AM) {
  4099. auto &AC = AM.getResult<AssumptionAnalysis>(F);
  4100. auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
  4101. auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
  4102. auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
  4103. auto &TTI = AM.getResult<TargetIRAnalysis>(F);
  4104. auto *LI = AM.getCachedResult<LoopAnalysis>(F);
  4105. auto *AA = &AM.getResult<AAManager>(F);
  4106. auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
  4107. ProfileSummaryInfo *PSI =
  4108. MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
  4109. auto *BFI = (PSI && PSI->hasProfileSummary()) ?
  4110. &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
  4111. if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
  4112. BFI, PSI, MaxIterations, LI))
  4113. // No changes, all analyses are preserved.
  4114. return PreservedAnalyses::all();
  4115. // Mark all the analyses that instcombine updates as preserved.
  4116. PreservedAnalyses PA;
  4117. PA.preserveSet<CFGAnalyses>();
  4118. return PA;
  4119. }
  4120. void InstructionCombiningPass::getAnalysisUsage(AnalysisUsage &AU) const {
  4121. AU.setPreservesCFG();
  4122. AU.addRequired<AAResultsWrapperPass>();
  4123. AU.addRequired<AssumptionCacheTracker>();
  4124. AU.addRequired<TargetLibraryInfoWrapperPass>();
  4125. AU.addRequired<TargetTransformInfoWrapperPass>();
  4126. AU.addRequired<DominatorTreeWrapperPass>();
  4127. AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
  4128. AU.addPreserved<DominatorTreeWrapperPass>();
  4129. AU.addPreserved<AAResultsWrapperPass>();
  4130. AU.addPreserved<BasicAAWrapperPass>();
  4131. AU.addPreserved<GlobalsAAWrapperPass>();
  4132. AU.addRequired<ProfileSummaryInfoWrapperPass>();
  4133. LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
  4134. }
  4135. bool InstructionCombiningPass::runOnFunction(Function &F) {
  4136. if (skipFunction(F))
  4137. return false;
  4138. // Required analyses.
  4139. auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
  4140. auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
  4141. auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
  4142. auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
  4143. auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
  4144. auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
  4145. // Optional analyses.
  4146. auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
  4147. auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
  4148. ProfileSummaryInfo *PSI =
  4149. &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
  4150. BlockFrequencyInfo *BFI =
  4151. (PSI && PSI->hasProfileSummary()) ?
  4152. &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
  4153. nullptr;
  4154. return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
  4155. BFI, PSI, MaxIterations, LI);
  4156. }
  4157. char InstructionCombiningPass::ID = 0;
  4158. InstructionCombiningPass::InstructionCombiningPass()
  4159. : FunctionPass(ID), MaxIterations(InstCombineDefaultMaxIterations) {
  4160. initializeInstructionCombiningPassPass(*PassRegistry::getPassRegistry());
  4161. }
  4162. InstructionCombiningPass::InstructionCombiningPass(unsigned MaxIterations)
  4163. : FunctionPass(ID), MaxIterations(MaxIterations) {
  4164. initializeInstructionCombiningPassPass(*PassRegistry::getPassRegistry());
  4165. }
  4166. INITIALIZE_PASS_BEGIN(InstructionCombiningPass, "instcombine",
  4167. "Combine redundant instructions", false, false)
  4168. INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
  4169. INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
  4170. INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
  4171. INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  4172. INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
  4173. INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
  4174. INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
  4175. INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass)
  4176. INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
  4177. INITIALIZE_PASS_END(InstructionCombiningPass, "instcombine",
  4178. "Combine redundant instructions", false, false)
  4179. // Initialization Routines
  4180. void llvm::initializeInstCombine(PassRegistry &Registry) {
  4181. initializeInstructionCombiningPassPass(Registry);
  4182. }
  4183. void LLVMInitializeInstCombine(LLVMPassRegistryRef R) {
  4184. initializeInstructionCombiningPassPass(*unwrap(R));
  4185. }
  4186. FunctionPass *llvm::createInstructionCombiningPass() {
  4187. return new InstructionCombiningPass();
  4188. }
  4189. FunctionPass *llvm::createInstructionCombiningPass(unsigned MaxIterations) {
  4190. return new InstructionCombiningPass(MaxIterations);
  4191. }
  4192. void LLVMAddInstructionCombiningPass(LLVMPassManagerRef PM) {
  4193. unwrap(PM)->add(createInstructionCombiningPass());
  4194. }