InstructionCombining.cpp 178 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505
  1. //===- InstructionCombining.cpp - Combine multiple instructions -----------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // InstructionCombining - Combine instructions to form fewer, simple
  10. // instructions. This pass does not modify the CFG. This pass is where
  11. // algebraic simplification happens.
  12. //
  13. // This pass combines things like:
  14. // %Y = add i32 %X, 1
  15. // %Z = add i32 %Y, 1
  16. // into:
  17. // %Z = add i32 %X, 2
  18. //
  19. // This is a simple worklist driven algorithm.
  20. //
  21. // This pass guarantees that the following canonicalizations are performed on
  22. // the program:
  23. // 1. If a binary operator has a constant operand, it is moved to the RHS
  24. // 2. Bitwise operators with constant operands are always grouped so that
  25. // shifts are performed first, then or's, then and's, then xor's.
  26. // 3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible
  27. // 4. All cmp instructions on boolean values are replaced with logical ops
  28. // 5. add X, X is represented as (X*2) => (X << 1)
  29. // 6. Multiplies with a power-of-two constant argument are transformed into
  30. // shifts.
  31. // ... etc.
  32. //
  33. //===----------------------------------------------------------------------===//
  34. #include "InstCombineInternal.h"
  35. #include "llvm-c/Initialization.h"
  36. #include "llvm-c/Transforms/InstCombine.h"
  37. #include "llvm/ADT/APInt.h"
  38. #include "llvm/ADT/ArrayRef.h"
  39. #include "llvm/ADT/DenseMap.h"
  40. #include "llvm/ADT/None.h"
  41. #include "llvm/ADT/SmallPtrSet.h"
  42. #include "llvm/ADT/SmallVector.h"
  43. #include "llvm/ADT/Statistic.h"
  44. #include "llvm/ADT/TinyPtrVector.h"
  45. #include "llvm/Analysis/AliasAnalysis.h"
  46. #include "llvm/Analysis/AssumptionCache.h"
  47. #include "llvm/Analysis/BasicAliasAnalysis.h"
  48. #include "llvm/Analysis/BlockFrequencyInfo.h"
  49. #include "llvm/Analysis/CFG.h"
  50. #include "llvm/Analysis/ConstantFolding.h"
  51. #include "llvm/Analysis/EHPersonalities.h"
  52. #include "llvm/Analysis/GlobalsModRef.h"
  53. #include "llvm/Analysis/InstructionSimplify.h"
  54. #include "llvm/Analysis/LazyBlockFrequencyInfo.h"
  55. #include "llvm/Analysis/LoopInfo.h"
  56. #include "llvm/Analysis/MemoryBuiltins.h"
  57. #include "llvm/Analysis/OptimizationRemarkEmitter.h"
  58. #include "llvm/Analysis/ProfileSummaryInfo.h"
  59. #include "llvm/Analysis/TargetFolder.h"
  60. #include "llvm/Analysis/TargetLibraryInfo.h"
  61. #include "llvm/Analysis/TargetTransformInfo.h"
  62. #include "llvm/Analysis/ValueTracking.h"
  63. #include "llvm/Analysis/VectorUtils.h"
  64. #include "llvm/IR/BasicBlock.h"
  65. #include "llvm/IR/CFG.h"
  66. #include "llvm/IR/Constant.h"
  67. #include "llvm/IR/Constants.h"
  68. #include "llvm/IR/DIBuilder.h"
  69. #include "llvm/IR/DataLayout.h"
  70. #include "llvm/IR/DebugInfo.h"
  71. #include "llvm/IR/DerivedTypes.h"
  72. #include "llvm/IR/Dominators.h"
  73. #include "llvm/IR/Function.h"
  74. #include "llvm/IR/GetElementPtrTypeIterator.h"
  75. #include "llvm/IR/IRBuilder.h"
  76. #include "llvm/IR/InstrTypes.h"
  77. #include "llvm/IR/Instruction.h"
  78. #include "llvm/IR/Instructions.h"
  79. #include "llvm/IR/IntrinsicInst.h"
  80. #include "llvm/IR/Intrinsics.h"
  81. #include "llvm/IR/LegacyPassManager.h"
  82. #include "llvm/IR/Metadata.h"
  83. #include "llvm/IR/Operator.h"
  84. #include "llvm/IR/PassManager.h"
  85. #include "llvm/IR/PatternMatch.h"
  86. #include "llvm/IR/Type.h"
  87. #include "llvm/IR/Use.h"
  88. #include "llvm/IR/User.h"
  89. #include "llvm/IR/Value.h"
  90. #include "llvm/IR/ValueHandle.h"
  91. #include "llvm/InitializePasses.h"
  92. #include "llvm/Pass.h"
  93. #include "llvm/Support/CBindingWrapping.h"
  94. #include "llvm/Support/Casting.h"
  95. #include "llvm/Support/CommandLine.h"
  96. #include "llvm/Support/Compiler.h"
  97. #include "llvm/Support/Debug.h"
  98. #include "llvm/Support/DebugCounter.h"
  99. #include "llvm/Support/ErrorHandling.h"
  100. #include "llvm/Support/KnownBits.h"
  101. #include "llvm/Support/raw_ostream.h"
  102. #include "llvm/Transforms/InstCombine/InstCombine.h"
  103. #include "llvm/Transforms/Utils/Local.h"
  104. #include <algorithm>
  105. #include <cassert>
  106. #include <cstdint>
  107. #include <memory>
  108. #include <string>
  109. #include <utility>
  110. #define DEBUG_TYPE "instcombine"
  111. #include "llvm/Transforms/Utils/InstructionWorklist.h"
  112. using namespace llvm;
  113. using namespace llvm::PatternMatch;
  114. STATISTIC(NumWorklistIterations,
  115. "Number of instruction combining iterations performed");
  116. STATISTIC(NumCombined , "Number of insts combined");
  117. STATISTIC(NumConstProp, "Number of constant folds");
  118. STATISTIC(NumDeadInst , "Number of dead inst eliminated");
  119. STATISTIC(NumSunkInst , "Number of instructions sunk");
  120. STATISTIC(NumExpand, "Number of expansions");
  121. STATISTIC(NumFactor , "Number of factorizations");
  122. STATISTIC(NumReassoc , "Number of reassociations");
  123. DEBUG_COUNTER(VisitCounter, "instcombine-visit",
  124. "Controls which instructions are visited");
  125. // FIXME: these limits eventually should be as low as 2.
  126. static constexpr unsigned InstCombineDefaultMaxIterations = 1000;
  127. #ifndef NDEBUG
  128. static constexpr unsigned InstCombineDefaultInfiniteLoopThreshold = 100;
  129. #else
  130. static constexpr unsigned InstCombineDefaultInfiniteLoopThreshold = 1000;
  131. #endif
  132. static cl::opt<bool>
  133. EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"),
  134. cl::init(true));
  135. static cl::opt<unsigned> LimitMaxIterations(
  136. "instcombine-max-iterations",
  137. cl::desc("Limit the maximum number of instruction combining iterations"),
  138. cl::init(InstCombineDefaultMaxIterations));
  139. static cl::opt<unsigned> InfiniteLoopDetectionThreshold(
  140. "instcombine-infinite-loop-threshold",
  141. cl::desc("Number of instruction combining iterations considered an "
  142. "infinite loop"),
  143. cl::init(InstCombineDefaultInfiniteLoopThreshold), cl::Hidden);
  144. static cl::opt<unsigned>
  145. MaxArraySize("instcombine-maxarray-size", cl::init(1024),
  146. cl::desc("Maximum array size considered when doing a combine"));
  147. // FIXME: Remove this flag when it is no longer necessary to convert
  148. // llvm.dbg.declare to avoid inaccurate debug info. Setting this to false
  149. // increases variable availability at the cost of accuracy. Variables that
  150. // cannot be promoted by mem2reg or SROA will be described as living in memory
  151. // for their entire lifetime. However, passes like DSE and instcombine can
  152. // delete stores to the alloca, leading to misleading and inaccurate debug
  153. // information. This flag can be removed when those passes are fixed.
  154. static cl::opt<unsigned> ShouldLowerDbgDeclare("instcombine-lower-dbg-declare",
  155. cl::Hidden, cl::init(true));
  156. Optional<Instruction *>
  157. InstCombiner::targetInstCombineIntrinsic(IntrinsicInst &II) {
  158. // Handle target specific intrinsics
  159. if (II.getCalledFunction()->isTargetIntrinsic()) {
  160. return TTI.instCombineIntrinsic(*this, II);
  161. }
  162. return None;
  163. }
  164. Optional<Value *> InstCombiner::targetSimplifyDemandedUseBitsIntrinsic(
  165. IntrinsicInst &II, APInt DemandedMask, KnownBits &Known,
  166. bool &KnownBitsComputed) {
  167. // Handle target specific intrinsics
  168. if (II.getCalledFunction()->isTargetIntrinsic()) {
  169. return TTI.simplifyDemandedUseBitsIntrinsic(*this, II, DemandedMask, Known,
  170. KnownBitsComputed);
  171. }
  172. return None;
  173. }
  174. Optional<Value *> InstCombiner::targetSimplifyDemandedVectorEltsIntrinsic(
  175. IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2,
  176. APInt &UndefElts3,
  177. std::function<void(Instruction *, unsigned, APInt, APInt &)>
  178. SimplifyAndSetOp) {
  179. // Handle target specific intrinsics
  180. if (II.getCalledFunction()->isTargetIntrinsic()) {
  181. return TTI.simplifyDemandedVectorEltsIntrinsic(
  182. *this, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
  183. SimplifyAndSetOp);
  184. }
  185. return None;
  186. }
  187. Value *InstCombinerImpl::EmitGEPOffset(User *GEP) {
  188. return llvm::EmitGEPOffset(&Builder, DL, GEP);
  189. }
  190. /// Legal integers and common types are considered desirable. This is used to
  191. /// avoid creating instructions with types that may not be supported well by the
  192. /// the backend.
  193. /// NOTE: This treats i8, i16 and i32 specially because they are common
  194. /// types in frontend languages.
  195. bool InstCombinerImpl::isDesirableIntType(unsigned BitWidth) const {
  196. switch (BitWidth) {
  197. case 8:
  198. case 16:
  199. case 32:
  200. return true;
  201. default:
  202. return DL.isLegalInteger(BitWidth);
  203. }
  204. }
  205. /// Return true if it is desirable to convert an integer computation from a
  206. /// given bit width to a new bit width.
  207. /// We don't want to convert from a legal to an illegal type or from a smaller
  208. /// to a larger illegal type. A width of '1' is always treated as a desirable
  209. /// type because i1 is a fundamental type in IR, and there are many specialized
  210. /// optimizations for i1 types. Common/desirable widths are equally treated as
  211. /// legal to convert to, in order to open up more combining opportunities.
  212. bool InstCombinerImpl::shouldChangeType(unsigned FromWidth,
  213. unsigned ToWidth) const {
  214. bool FromLegal = FromWidth == 1 || DL.isLegalInteger(FromWidth);
  215. bool ToLegal = ToWidth == 1 || DL.isLegalInteger(ToWidth);
  216. // Convert to desirable widths even if they are not legal types.
  217. // Only shrink types, to prevent infinite loops.
  218. if (ToWidth < FromWidth && isDesirableIntType(ToWidth))
  219. return true;
  220. // If this is a legal integer from type, and the result would be an illegal
  221. // type, don't do the transformation.
  222. if (FromLegal && !ToLegal)
  223. return false;
  224. // Otherwise, if both are illegal, do not increase the size of the result. We
  225. // do allow things like i160 -> i64, but not i64 -> i160.
  226. if (!FromLegal && !ToLegal && ToWidth > FromWidth)
  227. return false;
  228. return true;
  229. }
  230. /// Return true if it is desirable to convert a computation from 'From' to 'To'.
  231. /// We don't want to convert from a legal to an illegal type or from a smaller
  232. /// to a larger illegal type. i1 is always treated as a legal type because it is
  233. /// a fundamental type in IR, and there are many specialized optimizations for
  234. /// i1 types.
  235. bool InstCombinerImpl::shouldChangeType(Type *From, Type *To) const {
  236. // TODO: This could be extended to allow vectors. Datalayout changes might be
  237. // needed to properly support that.
  238. if (!From->isIntegerTy() || !To->isIntegerTy())
  239. return false;
  240. unsigned FromWidth = From->getPrimitiveSizeInBits();
  241. unsigned ToWidth = To->getPrimitiveSizeInBits();
  242. return shouldChangeType(FromWidth, ToWidth);
  243. }
  244. // Return true, if No Signed Wrap should be maintained for I.
  245. // The No Signed Wrap flag can be kept if the operation "B (I.getOpcode) C",
  246. // where both B and C should be ConstantInts, results in a constant that does
  247. // not overflow. This function only handles the Add and Sub opcodes. For
  248. // all other opcodes, the function conservatively returns false.
  249. static bool maintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C) {
  250. auto *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
  251. if (!OBO || !OBO->hasNoSignedWrap())
  252. return false;
  253. // We reason about Add and Sub Only.
  254. Instruction::BinaryOps Opcode = I.getOpcode();
  255. if (Opcode != Instruction::Add && Opcode != Instruction::Sub)
  256. return false;
  257. const APInt *BVal, *CVal;
  258. if (!match(B, m_APInt(BVal)) || !match(C, m_APInt(CVal)))
  259. return false;
  260. bool Overflow = false;
  261. if (Opcode == Instruction::Add)
  262. (void)BVal->sadd_ov(*CVal, Overflow);
  263. else
  264. (void)BVal->ssub_ov(*CVal, Overflow);
  265. return !Overflow;
  266. }
  267. static bool hasNoUnsignedWrap(BinaryOperator &I) {
  268. auto *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
  269. return OBO && OBO->hasNoUnsignedWrap();
  270. }
  271. static bool hasNoSignedWrap(BinaryOperator &I) {
  272. auto *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
  273. return OBO && OBO->hasNoSignedWrap();
  274. }
  275. /// Conservatively clears subclassOptionalData after a reassociation or
  276. /// commutation. We preserve fast-math flags when applicable as they can be
  277. /// preserved.
  278. static void ClearSubclassDataAfterReassociation(BinaryOperator &I) {
  279. FPMathOperator *FPMO = dyn_cast<FPMathOperator>(&I);
  280. if (!FPMO) {
  281. I.clearSubclassOptionalData();
  282. return;
  283. }
  284. FastMathFlags FMF = I.getFastMathFlags();
  285. I.clearSubclassOptionalData();
  286. I.setFastMathFlags(FMF);
  287. }
  288. /// Combine constant operands of associative operations either before or after a
  289. /// cast to eliminate one of the associative operations:
  290. /// (op (cast (op X, C2)), C1) --> (cast (op X, op (C1, C2)))
  291. /// (op (cast (op X, C2)), C1) --> (op (cast X), op (C1, C2))
  292. static bool simplifyAssocCastAssoc(BinaryOperator *BinOp1,
  293. InstCombinerImpl &IC) {
  294. auto *Cast = dyn_cast<CastInst>(BinOp1->getOperand(0));
  295. if (!Cast || !Cast->hasOneUse())
  296. return false;
  297. // TODO: Enhance logic for other casts and remove this check.
  298. auto CastOpcode = Cast->getOpcode();
  299. if (CastOpcode != Instruction::ZExt)
  300. return false;
  301. // TODO: Enhance logic for other BinOps and remove this check.
  302. if (!BinOp1->isBitwiseLogicOp())
  303. return false;
  304. auto AssocOpcode = BinOp1->getOpcode();
  305. auto *BinOp2 = dyn_cast<BinaryOperator>(Cast->getOperand(0));
  306. if (!BinOp2 || !BinOp2->hasOneUse() || BinOp2->getOpcode() != AssocOpcode)
  307. return false;
  308. Constant *C1, *C2;
  309. if (!match(BinOp1->getOperand(1), m_Constant(C1)) ||
  310. !match(BinOp2->getOperand(1), m_Constant(C2)))
  311. return false;
  312. // TODO: This assumes a zext cast.
  313. // Eg, if it was a trunc, we'd cast C1 to the source type because casting C2
  314. // to the destination type might lose bits.
  315. // Fold the constants together in the destination type:
  316. // (op (cast (op X, C2)), C1) --> (op (cast X), FoldedC)
  317. Type *DestTy = C1->getType();
  318. Constant *CastC2 = ConstantExpr::getCast(CastOpcode, C2, DestTy);
  319. Constant *FoldedC = ConstantExpr::get(AssocOpcode, C1, CastC2);
  320. IC.replaceOperand(*Cast, 0, BinOp2->getOperand(0));
  321. IC.replaceOperand(*BinOp1, 1, FoldedC);
  322. return true;
  323. }
  324. // Simplifies IntToPtr/PtrToInt RoundTrip Cast To BitCast.
  325. // inttoptr ( ptrtoint (x) ) --> x
  326. Value *InstCombinerImpl::simplifyIntToPtrRoundTripCast(Value *Val) {
  327. auto *IntToPtr = dyn_cast<IntToPtrInst>(Val);
  328. if (IntToPtr && DL.getPointerTypeSizeInBits(IntToPtr->getDestTy()) ==
  329. DL.getTypeSizeInBits(IntToPtr->getSrcTy())) {
  330. auto *PtrToInt = dyn_cast<PtrToIntInst>(IntToPtr->getOperand(0));
  331. Type *CastTy = IntToPtr->getDestTy();
  332. if (PtrToInt &&
  333. CastTy->getPointerAddressSpace() ==
  334. PtrToInt->getSrcTy()->getPointerAddressSpace() &&
  335. DL.getPointerTypeSizeInBits(PtrToInt->getSrcTy()) ==
  336. DL.getTypeSizeInBits(PtrToInt->getDestTy())) {
  337. return CastInst::CreateBitOrPointerCast(PtrToInt->getOperand(0), CastTy,
  338. "", PtrToInt);
  339. }
  340. }
  341. return nullptr;
  342. }
  343. /// This performs a few simplifications for operators that are associative or
  344. /// commutative:
  345. ///
  346. /// Commutative operators:
  347. ///
  348. /// 1. Order operands such that they are listed from right (least complex) to
  349. /// left (most complex). This puts constants before unary operators before
  350. /// binary operators.
  351. ///
  352. /// Associative operators:
  353. ///
  354. /// 2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
  355. /// 3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
  356. ///
  357. /// Associative and commutative operators:
  358. ///
  359. /// 4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
  360. /// 5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
  361. /// 6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
  362. /// if C1 and C2 are constants.
  363. bool InstCombinerImpl::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
  364. Instruction::BinaryOps Opcode = I.getOpcode();
  365. bool Changed = false;
  366. do {
  367. // Order operands such that they are listed from right (least complex) to
  368. // left (most complex). This puts constants before unary operators before
  369. // binary operators.
  370. if (I.isCommutative() && getComplexity(I.getOperand(0)) <
  371. getComplexity(I.getOperand(1)))
  372. Changed = !I.swapOperands();
  373. BinaryOperator *Op0 = dyn_cast<BinaryOperator>(I.getOperand(0));
  374. BinaryOperator *Op1 = dyn_cast<BinaryOperator>(I.getOperand(1));
  375. if (I.isAssociative()) {
  376. // Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
  377. if (Op0 && Op0->getOpcode() == Opcode) {
  378. Value *A = Op0->getOperand(0);
  379. Value *B = Op0->getOperand(1);
  380. Value *C = I.getOperand(1);
  381. // Does "B op C" simplify?
  382. if (Value *V = SimplifyBinOp(Opcode, B, C, SQ.getWithInstruction(&I))) {
  383. // It simplifies to V. Form "A op V".
  384. replaceOperand(I, 0, A);
  385. replaceOperand(I, 1, V);
  386. bool IsNUW = hasNoUnsignedWrap(I) && hasNoUnsignedWrap(*Op0);
  387. bool IsNSW = maintainNoSignedWrap(I, B, C) && hasNoSignedWrap(*Op0);
  388. // Conservatively clear all optional flags since they may not be
  389. // preserved by the reassociation. Reset nsw/nuw based on the above
  390. // analysis.
  391. ClearSubclassDataAfterReassociation(I);
  392. // Note: this is only valid because SimplifyBinOp doesn't look at
  393. // the operands to Op0.
  394. if (IsNUW)
  395. I.setHasNoUnsignedWrap(true);
  396. if (IsNSW)
  397. I.setHasNoSignedWrap(true);
  398. Changed = true;
  399. ++NumReassoc;
  400. continue;
  401. }
  402. }
  403. // Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
  404. if (Op1 && Op1->getOpcode() == Opcode) {
  405. Value *A = I.getOperand(0);
  406. Value *B = Op1->getOperand(0);
  407. Value *C = Op1->getOperand(1);
  408. // Does "A op B" simplify?
  409. if (Value *V = SimplifyBinOp(Opcode, A, B, SQ.getWithInstruction(&I))) {
  410. // It simplifies to V. Form "V op C".
  411. replaceOperand(I, 0, V);
  412. replaceOperand(I, 1, C);
  413. // Conservatively clear the optional flags, since they may not be
  414. // preserved by the reassociation.
  415. ClearSubclassDataAfterReassociation(I);
  416. Changed = true;
  417. ++NumReassoc;
  418. continue;
  419. }
  420. }
  421. }
  422. if (I.isAssociative() && I.isCommutative()) {
  423. if (simplifyAssocCastAssoc(&I, *this)) {
  424. Changed = true;
  425. ++NumReassoc;
  426. continue;
  427. }
  428. // Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
  429. if (Op0 && Op0->getOpcode() == Opcode) {
  430. Value *A = Op0->getOperand(0);
  431. Value *B = Op0->getOperand(1);
  432. Value *C = I.getOperand(1);
  433. // Does "C op A" simplify?
  434. if (Value *V = SimplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) {
  435. // It simplifies to V. Form "V op B".
  436. replaceOperand(I, 0, V);
  437. replaceOperand(I, 1, B);
  438. // Conservatively clear the optional flags, since they may not be
  439. // preserved by the reassociation.
  440. ClearSubclassDataAfterReassociation(I);
  441. Changed = true;
  442. ++NumReassoc;
  443. continue;
  444. }
  445. }
  446. // Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
  447. if (Op1 && Op1->getOpcode() == Opcode) {
  448. Value *A = I.getOperand(0);
  449. Value *B = Op1->getOperand(0);
  450. Value *C = Op1->getOperand(1);
  451. // Does "C op A" simplify?
  452. if (Value *V = SimplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) {
  453. // It simplifies to V. Form "B op V".
  454. replaceOperand(I, 0, B);
  455. replaceOperand(I, 1, V);
  456. // Conservatively clear the optional flags, since they may not be
  457. // preserved by the reassociation.
  458. ClearSubclassDataAfterReassociation(I);
  459. Changed = true;
  460. ++NumReassoc;
  461. continue;
  462. }
  463. }
  464. // Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
  465. // if C1 and C2 are constants.
  466. Value *A, *B;
  467. Constant *C1, *C2;
  468. if (Op0 && Op1 &&
  469. Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode &&
  470. match(Op0, m_OneUse(m_BinOp(m_Value(A), m_Constant(C1)))) &&
  471. match(Op1, m_OneUse(m_BinOp(m_Value(B), m_Constant(C2))))) {
  472. bool IsNUW = hasNoUnsignedWrap(I) &&
  473. hasNoUnsignedWrap(*Op0) &&
  474. hasNoUnsignedWrap(*Op1);
  475. BinaryOperator *NewBO = (IsNUW && Opcode == Instruction::Add) ?
  476. BinaryOperator::CreateNUW(Opcode, A, B) :
  477. BinaryOperator::Create(Opcode, A, B);
  478. if (isa<FPMathOperator>(NewBO)) {
  479. FastMathFlags Flags = I.getFastMathFlags();
  480. Flags &= Op0->getFastMathFlags();
  481. Flags &= Op1->getFastMathFlags();
  482. NewBO->setFastMathFlags(Flags);
  483. }
  484. InsertNewInstWith(NewBO, I);
  485. NewBO->takeName(Op1);
  486. replaceOperand(I, 0, NewBO);
  487. replaceOperand(I, 1, ConstantExpr::get(Opcode, C1, C2));
  488. // Conservatively clear the optional flags, since they may not be
  489. // preserved by the reassociation.
  490. ClearSubclassDataAfterReassociation(I);
  491. if (IsNUW)
  492. I.setHasNoUnsignedWrap(true);
  493. Changed = true;
  494. continue;
  495. }
  496. }
  497. // No further simplifications.
  498. return Changed;
  499. } while (true);
  500. }
  501. /// Return whether "X LOp (Y ROp Z)" is always equal to
  502. /// "(X LOp Y) ROp (X LOp Z)".
  503. static bool leftDistributesOverRight(Instruction::BinaryOps LOp,
  504. Instruction::BinaryOps ROp) {
  505. // X & (Y | Z) <--> (X & Y) | (X & Z)
  506. // X & (Y ^ Z) <--> (X & Y) ^ (X & Z)
  507. if (LOp == Instruction::And)
  508. return ROp == Instruction::Or || ROp == Instruction::Xor;
  509. // X | (Y & Z) <--> (X | Y) & (X | Z)
  510. if (LOp == Instruction::Or)
  511. return ROp == Instruction::And;
  512. // X * (Y + Z) <--> (X * Y) + (X * Z)
  513. // X * (Y - Z) <--> (X * Y) - (X * Z)
  514. if (LOp == Instruction::Mul)
  515. return ROp == Instruction::Add || ROp == Instruction::Sub;
  516. return false;
  517. }
  518. /// Return whether "(X LOp Y) ROp Z" is always equal to
  519. /// "(X ROp Z) LOp (Y ROp Z)".
  520. static bool rightDistributesOverLeft(Instruction::BinaryOps LOp,
  521. Instruction::BinaryOps ROp) {
  522. if (Instruction::isCommutative(ROp))
  523. return leftDistributesOverRight(ROp, LOp);
  524. // (X {&|^} Y) >> Z <--> (X >> Z) {&|^} (Y >> Z) for all shifts.
  525. return Instruction::isBitwiseLogicOp(LOp) && Instruction::isShift(ROp);
  526. // TODO: It would be nice to handle division, aka "(X + Y)/Z = X/Z + Y/Z",
  527. // but this requires knowing that the addition does not overflow and other
  528. // such subtleties.
  529. }
  530. /// This function returns identity value for given opcode, which can be used to
  531. /// factor patterns like (X * 2) + X ==> (X * 2) + (X * 1) ==> X * (2 + 1).
  532. static Value *getIdentityValue(Instruction::BinaryOps Opcode, Value *V) {
  533. if (isa<Constant>(V))
  534. return nullptr;
  535. return ConstantExpr::getBinOpIdentity(Opcode, V->getType());
  536. }
  537. /// This function predicates factorization using distributive laws. By default,
  538. /// it just returns the 'Op' inputs. But for special-cases like
  539. /// 'add(shl(X, 5), ...)', this function will have TopOpcode == Instruction::Add
  540. /// and Op = shl(X, 5). The 'shl' is treated as the more general 'mul X, 32' to
  541. /// allow more factorization opportunities.
  542. static Instruction::BinaryOps
  543. getBinOpsForFactorization(Instruction::BinaryOps TopOpcode, BinaryOperator *Op,
  544. Value *&LHS, Value *&RHS) {
  545. assert(Op && "Expected a binary operator");
  546. LHS = Op->getOperand(0);
  547. RHS = Op->getOperand(1);
  548. if (TopOpcode == Instruction::Add || TopOpcode == Instruction::Sub) {
  549. Constant *C;
  550. if (match(Op, m_Shl(m_Value(), m_Constant(C)))) {
  551. // X << C --> X * (1 << C)
  552. RHS = ConstantExpr::getShl(ConstantInt::get(Op->getType(), 1), C);
  553. return Instruction::Mul;
  554. }
  555. // TODO: We can add other conversions e.g. shr => div etc.
  556. }
  557. return Op->getOpcode();
  558. }
  559. /// This tries to simplify binary operations by factorizing out common terms
  560. /// (e. g. "(A*B)+(A*C)" -> "A*(B+C)").
  561. Value *InstCombinerImpl::tryFactorization(BinaryOperator &I,
  562. Instruction::BinaryOps InnerOpcode,
  563. Value *A, Value *B, Value *C,
  564. Value *D) {
  565. assert(A && B && C && D && "All values must be provided");
  566. Value *V = nullptr;
  567. Value *SimplifiedInst = nullptr;
  568. Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
  569. Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
  570. // Does "X op' Y" always equal "Y op' X"?
  571. bool InnerCommutative = Instruction::isCommutative(InnerOpcode);
  572. // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"?
  573. if (leftDistributesOverRight(InnerOpcode, TopLevelOpcode))
  574. // Does the instruction have the form "(A op' B) op (A op' D)" or, in the
  575. // commutative case, "(A op' B) op (C op' A)"?
  576. if (A == C || (InnerCommutative && A == D)) {
  577. if (A != C)
  578. std::swap(C, D);
  579. // Consider forming "A op' (B op D)".
  580. // If "B op D" simplifies then it can be formed with no cost.
  581. V = SimplifyBinOp(TopLevelOpcode, B, D, SQ.getWithInstruction(&I));
  582. // If "B op D" doesn't simplify then only go on if both of the existing
  583. // operations "A op' B" and "C op' D" will be zapped as no longer used.
  584. if (!V && LHS->hasOneUse() && RHS->hasOneUse())
  585. V = Builder.CreateBinOp(TopLevelOpcode, B, D, RHS->getName());
  586. if (V) {
  587. SimplifiedInst = Builder.CreateBinOp(InnerOpcode, A, V);
  588. }
  589. }
  590. // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"?
  591. if (!SimplifiedInst && rightDistributesOverLeft(TopLevelOpcode, InnerOpcode))
  592. // Does the instruction have the form "(A op' B) op (C op' B)" or, in the
  593. // commutative case, "(A op' B) op (B op' D)"?
  594. if (B == D || (InnerCommutative && B == C)) {
  595. if (B != D)
  596. std::swap(C, D);
  597. // Consider forming "(A op C) op' B".
  598. // If "A op C" simplifies then it can be formed with no cost.
  599. V = SimplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I));
  600. // If "A op C" doesn't simplify then only go on if both of the existing
  601. // operations "A op' B" and "C op' D" will be zapped as no longer used.
  602. if (!V && LHS->hasOneUse() && RHS->hasOneUse())
  603. V = Builder.CreateBinOp(TopLevelOpcode, A, C, LHS->getName());
  604. if (V) {
  605. SimplifiedInst = Builder.CreateBinOp(InnerOpcode, V, B);
  606. }
  607. }
  608. if (SimplifiedInst) {
  609. ++NumFactor;
  610. SimplifiedInst->takeName(&I);
  611. // Check if we can add NSW/NUW flags to SimplifiedInst. If so, set them.
  612. if (BinaryOperator *BO = dyn_cast<BinaryOperator>(SimplifiedInst)) {
  613. if (isa<OverflowingBinaryOperator>(SimplifiedInst)) {
  614. bool HasNSW = false;
  615. bool HasNUW = false;
  616. if (isa<OverflowingBinaryOperator>(&I)) {
  617. HasNSW = I.hasNoSignedWrap();
  618. HasNUW = I.hasNoUnsignedWrap();
  619. }
  620. if (auto *LOBO = dyn_cast<OverflowingBinaryOperator>(LHS)) {
  621. HasNSW &= LOBO->hasNoSignedWrap();
  622. HasNUW &= LOBO->hasNoUnsignedWrap();
  623. }
  624. if (auto *ROBO = dyn_cast<OverflowingBinaryOperator>(RHS)) {
  625. HasNSW &= ROBO->hasNoSignedWrap();
  626. HasNUW &= ROBO->hasNoUnsignedWrap();
  627. }
  628. if (TopLevelOpcode == Instruction::Add &&
  629. InnerOpcode == Instruction::Mul) {
  630. // We can propagate 'nsw' if we know that
  631. // %Y = mul nsw i16 %X, C
  632. // %Z = add nsw i16 %Y, %X
  633. // =>
  634. // %Z = mul nsw i16 %X, C+1
  635. //
  636. // iff C+1 isn't INT_MIN
  637. const APInt *CInt;
  638. if (match(V, m_APInt(CInt))) {
  639. if (!CInt->isMinSignedValue())
  640. BO->setHasNoSignedWrap(HasNSW);
  641. }
  642. // nuw can be propagated with any constant or nuw value.
  643. BO->setHasNoUnsignedWrap(HasNUW);
  644. }
  645. }
  646. }
  647. }
  648. return SimplifiedInst;
  649. }
  650. /// This tries to simplify binary operations which some other binary operation
  651. /// distributes over either by factorizing out common terms
  652. /// (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this results in
  653. /// simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is a win).
  654. /// Returns the simplified value, or null if it didn't simplify.
  655. Value *InstCombinerImpl::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
  656. Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
  657. BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
  658. BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
  659. Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
  660. {
  661. // Factorization.
  662. Value *A, *B, *C, *D;
  663. Instruction::BinaryOps LHSOpcode, RHSOpcode;
  664. if (Op0)
  665. LHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op0, A, B);
  666. if (Op1)
  667. RHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op1, C, D);
  668. // The instruction has the form "(A op' B) op (C op' D)". Try to factorize
  669. // a common term.
  670. if (Op0 && Op1 && LHSOpcode == RHSOpcode)
  671. if (Value *V = tryFactorization(I, LHSOpcode, A, B, C, D))
  672. return V;
  673. // The instruction has the form "(A op' B) op (C)". Try to factorize common
  674. // term.
  675. if (Op0)
  676. if (Value *Ident = getIdentityValue(LHSOpcode, RHS))
  677. if (Value *V = tryFactorization(I, LHSOpcode, A, B, RHS, Ident))
  678. return V;
  679. // The instruction has the form "(B) op (C op' D)". Try to factorize common
  680. // term.
  681. if (Op1)
  682. if (Value *Ident = getIdentityValue(RHSOpcode, LHS))
  683. if (Value *V = tryFactorization(I, RHSOpcode, LHS, Ident, C, D))
  684. return V;
  685. }
  686. // Expansion.
  687. if (Op0 && rightDistributesOverLeft(Op0->getOpcode(), TopLevelOpcode)) {
  688. // The instruction has the form "(A op' B) op C". See if expanding it out
  689. // to "(A op C) op' (B op C)" results in simplifications.
  690. Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS;
  691. Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
  692. // Disable the use of undef because it's not safe to distribute undef.
  693. auto SQDistributive = SQ.getWithInstruction(&I).getWithoutUndef();
  694. Value *L = SimplifyBinOp(TopLevelOpcode, A, C, SQDistributive);
  695. Value *R = SimplifyBinOp(TopLevelOpcode, B, C, SQDistributive);
  696. // Do "A op C" and "B op C" both simplify?
  697. if (L && R) {
  698. // They do! Return "L op' R".
  699. ++NumExpand;
  700. C = Builder.CreateBinOp(InnerOpcode, L, R);
  701. C->takeName(&I);
  702. return C;
  703. }
  704. // Does "A op C" simplify to the identity value for the inner opcode?
  705. if (L && L == ConstantExpr::getBinOpIdentity(InnerOpcode, L->getType())) {
  706. // They do! Return "B op C".
  707. ++NumExpand;
  708. C = Builder.CreateBinOp(TopLevelOpcode, B, C);
  709. C->takeName(&I);
  710. return C;
  711. }
  712. // Does "B op C" simplify to the identity value for the inner opcode?
  713. if (R && R == ConstantExpr::getBinOpIdentity(InnerOpcode, R->getType())) {
  714. // They do! Return "A op C".
  715. ++NumExpand;
  716. C = Builder.CreateBinOp(TopLevelOpcode, A, C);
  717. C->takeName(&I);
  718. return C;
  719. }
  720. }
  721. if (Op1 && leftDistributesOverRight(TopLevelOpcode, Op1->getOpcode())) {
  722. // The instruction has the form "A op (B op' C)". See if expanding it out
  723. // to "(A op B) op' (A op C)" results in simplifications.
  724. Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1);
  725. Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op'
  726. // Disable the use of undef because it's not safe to distribute undef.
  727. auto SQDistributive = SQ.getWithInstruction(&I).getWithoutUndef();
  728. Value *L = SimplifyBinOp(TopLevelOpcode, A, B, SQDistributive);
  729. Value *R = SimplifyBinOp(TopLevelOpcode, A, C, SQDistributive);
  730. // Do "A op B" and "A op C" both simplify?
  731. if (L && R) {
  732. // They do! Return "L op' R".
  733. ++NumExpand;
  734. A = Builder.CreateBinOp(InnerOpcode, L, R);
  735. A->takeName(&I);
  736. return A;
  737. }
  738. // Does "A op B" simplify to the identity value for the inner opcode?
  739. if (L && L == ConstantExpr::getBinOpIdentity(InnerOpcode, L->getType())) {
  740. // They do! Return "A op C".
  741. ++NumExpand;
  742. A = Builder.CreateBinOp(TopLevelOpcode, A, C);
  743. A->takeName(&I);
  744. return A;
  745. }
  746. // Does "A op C" simplify to the identity value for the inner opcode?
  747. if (R && R == ConstantExpr::getBinOpIdentity(InnerOpcode, R->getType())) {
  748. // They do! Return "A op B".
  749. ++NumExpand;
  750. A = Builder.CreateBinOp(TopLevelOpcode, A, B);
  751. A->takeName(&I);
  752. return A;
  753. }
  754. }
  755. return SimplifySelectsFeedingBinaryOp(I, LHS, RHS);
  756. }
  757. Value *InstCombinerImpl::SimplifySelectsFeedingBinaryOp(BinaryOperator &I,
  758. Value *LHS,
  759. Value *RHS) {
  760. Value *A, *B, *C, *D, *E, *F;
  761. bool LHSIsSelect = match(LHS, m_Select(m_Value(A), m_Value(B), m_Value(C)));
  762. bool RHSIsSelect = match(RHS, m_Select(m_Value(D), m_Value(E), m_Value(F)));
  763. if (!LHSIsSelect && !RHSIsSelect)
  764. return nullptr;
  765. FastMathFlags FMF;
  766. BuilderTy::FastMathFlagGuard Guard(Builder);
  767. if (isa<FPMathOperator>(&I)) {
  768. FMF = I.getFastMathFlags();
  769. Builder.setFastMathFlags(FMF);
  770. }
  771. Instruction::BinaryOps Opcode = I.getOpcode();
  772. SimplifyQuery Q = SQ.getWithInstruction(&I);
  773. Value *Cond, *True = nullptr, *False = nullptr;
  774. if (LHSIsSelect && RHSIsSelect && A == D) {
  775. // (A ? B : C) op (A ? E : F) -> A ? (B op E) : (C op F)
  776. Cond = A;
  777. True = SimplifyBinOp(Opcode, B, E, FMF, Q);
  778. False = SimplifyBinOp(Opcode, C, F, FMF, Q);
  779. if (LHS->hasOneUse() && RHS->hasOneUse()) {
  780. if (False && !True)
  781. True = Builder.CreateBinOp(Opcode, B, E);
  782. else if (True && !False)
  783. False = Builder.CreateBinOp(Opcode, C, F);
  784. }
  785. } else if (LHSIsSelect && LHS->hasOneUse()) {
  786. // (A ? B : C) op Y -> A ? (B op Y) : (C op Y)
  787. Cond = A;
  788. True = SimplifyBinOp(Opcode, B, RHS, FMF, Q);
  789. False = SimplifyBinOp(Opcode, C, RHS, FMF, Q);
  790. } else if (RHSIsSelect && RHS->hasOneUse()) {
  791. // X op (D ? E : F) -> D ? (X op E) : (X op F)
  792. Cond = D;
  793. True = SimplifyBinOp(Opcode, LHS, E, FMF, Q);
  794. False = SimplifyBinOp(Opcode, LHS, F, FMF, Q);
  795. }
  796. if (!True || !False)
  797. return nullptr;
  798. Value *SI = Builder.CreateSelect(Cond, True, False);
  799. SI->takeName(&I);
  800. return SI;
  801. }
  802. /// Freely adapt every user of V as-if V was changed to !V.
  803. /// WARNING: only if canFreelyInvertAllUsersOf() said this can be done.
  804. void InstCombinerImpl::freelyInvertAllUsersOf(Value *I) {
  805. for (User *U : I->users()) {
  806. switch (cast<Instruction>(U)->getOpcode()) {
  807. case Instruction::Select: {
  808. auto *SI = cast<SelectInst>(U);
  809. SI->swapValues();
  810. SI->swapProfMetadata();
  811. break;
  812. }
  813. case Instruction::Br:
  814. cast<BranchInst>(U)->swapSuccessors(); // swaps prof metadata too
  815. break;
  816. case Instruction::Xor:
  817. replaceInstUsesWith(cast<Instruction>(*U), I);
  818. break;
  819. default:
  820. llvm_unreachable("Got unexpected user - out of sync with "
  821. "canFreelyInvertAllUsersOf() ?");
  822. }
  823. }
  824. }
  825. /// Given a 'sub' instruction, return the RHS of the instruction if the LHS is a
  826. /// constant zero (which is the 'negate' form).
  827. Value *InstCombinerImpl::dyn_castNegVal(Value *V) const {
  828. Value *NegV;
  829. if (match(V, m_Neg(m_Value(NegV))))
  830. return NegV;
  831. // Constants can be considered to be negated values if they can be folded.
  832. if (ConstantInt *C = dyn_cast<ConstantInt>(V))
  833. return ConstantExpr::getNeg(C);
  834. if (ConstantDataVector *C = dyn_cast<ConstantDataVector>(V))
  835. if (C->getType()->getElementType()->isIntegerTy())
  836. return ConstantExpr::getNeg(C);
  837. if (ConstantVector *CV = dyn_cast<ConstantVector>(V)) {
  838. for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
  839. Constant *Elt = CV->getAggregateElement(i);
  840. if (!Elt)
  841. return nullptr;
  842. if (isa<UndefValue>(Elt))
  843. continue;
  844. if (!isa<ConstantInt>(Elt))
  845. return nullptr;
  846. }
  847. return ConstantExpr::getNeg(CV);
  848. }
  849. // Negate integer vector splats.
  850. if (auto *CV = dyn_cast<Constant>(V))
  851. if (CV->getType()->isVectorTy() &&
  852. CV->getType()->getScalarType()->isIntegerTy() && CV->getSplatValue())
  853. return ConstantExpr::getNeg(CV);
  854. return nullptr;
  855. }
  856. /// A binop with a constant operand and a sign-extended boolean operand may be
  857. /// converted into a select of constants by applying the binary operation to
  858. /// the constant with the two possible values of the extended boolean (0 or -1).
  859. Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) {
  860. // TODO: Handle non-commutative binop (constant is operand 0).
  861. // TODO: Handle zext.
  862. // TODO: Peek through 'not' of cast.
  863. Value *BO0 = BO.getOperand(0);
  864. Value *BO1 = BO.getOperand(1);
  865. Value *X;
  866. Constant *C;
  867. if (!match(BO0, m_SExt(m_Value(X))) || !match(BO1, m_ImmConstant(C)) ||
  868. !X->getType()->isIntOrIntVectorTy(1))
  869. return nullptr;
  870. // bo (sext i1 X), C --> select X, (bo -1, C), (bo 0, C)
  871. Constant *Ones = ConstantInt::getAllOnesValue(BO.getType());
  872. Constant *Zero = ConstantInt::getNullValue(BO.getType());
  873. Constant *TVal = ConstantExpr::get(BO.getOpcode(), Ones, C);
  874. Constant *FVal = ConstantExpr::get(BO.getOpcode(), Zero, C);
  875. return SelectInst::Create(X, TVal, FVal);
  876. }
  877. static Value *foldOperationIntoSelectOperand(Instruction &I, Value *SO,
  878. InstCombiner::BuilderTy &Builder) {
  879. if (auto *Cast = dyn_cast<CastInst>(&I))
  880. return Builder.CreateCast(Cast->getOpcode(), SO, I.getType());
  881. if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
  882. assert(canConstantFoldCallTo(II, cast<Function>(II->getCalledOperand())) &&
  883. "Expected constant-foldable intrinsic");
  884. Intrinsic::ID IID = II->getIntrinsicID();
  885. if (II->arg_size() == 1)
  886. return Builder.CreateUnaryIntrinsic(IID, SO);
  887. // This works for real binary ops like min/max (where we always expect the
  888. // constant operand to be canonicalized as op1) and unary ops with a bonus
  889. // constant argument like ctlz/cttz.
  890. // TODO: Handle non-commutative binary intrinsics as below for binops.
  891. assert(II->arg_size() == 2 && "Expected binary intrinsic");
  892. assert(isa<Constant>(II->getArgOperand(1)) && "Expected constant operand");
  893. return Builder.CreateBinaryIntrinsic(IID, SO, II->getArgOperand(1));
  894. }
  895. assert(I.isBinaryOp() && "Unexpected opcode for select folding");
  896. // Figure out if the constant is the left or the right argument.
  897. bool ConstIsRHS = isa<Constant>(I.getOperand(1));
  898. Constant *ConstOperand = cast<Constant>(I.getOperand(ConstIsRHS));
  899. if (auto *SOC = dyn_cast<Constant>(SO)) {
  900. if (ConstIsRHS)
  901. return ConstantExpr::get(I.getOpcode(), SOC, ConstOperand);
  902. return ConstantExpr::get(I.getOpcode(), ConstOperand, SOC);
  903. }
  904. Value *Op0 = SO, *Op1 = ConstOperand;
  905. if (!ConstIsRHS)
  906. std::swap(Op0, Op1);
  907. Value *NewBO = Builder.CreateBinOp(cast<BinaryOperator>(&I)->getOpcode(), Op0,
  908. Op1, SO->getName() + ".op");
  909. if (auto *NewBOI = dyn_cast<Instruction>(NewBO))
  910. NewBOI->copyIRFlags(&I);
  911. return NewBO;
  912. }
  913. Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op,
  914. SelectInst *SI) {
  915. // Don't modify shared select instructions.
  916. if (!SI->hasOneUse())
  917. return nullptr;
  918. Value *TV = SI->getTrueValue();
  919. Value *FV = SI->getFalseValue();
  920. if (!(isa<Constant>(TV) || isa<Constant>(FV)))
  921. return nullptr;
  922. // Bool selects with constant operands can be folded to logical ops.
  923. if (SI->getType()->isIntOrIntVectorTy(1))
  924. return nullptr;
  925. // If it's a bitcast involving vectors, make sure it has the same number of
  926. // elements on both sides.
  927. if (auto *BC = dyn_cast<BitCastInst>(&Op)) {
  928. VectorType *DestTy = dyn_cast<VectorType>(BC->getDestTy());
  929. VectorType *SrcTy = dyn_cast<VectorType>(BC->getSrcTy());
  930. // Verify that either both or neither are vectors.
  931. if ((SrcTy == nullptr) != (DestTy == nullptr))
  932. return nullptr;
  933. // If vectors, verify that they have the same number of elements.
  934. if (SrcTy && SrcTy->getElementCount() != DestTy->getElementCount())
  935. return nullptr;
  936. }
  937. // Test if a CmpInst instruction is used exclusively by a select as
  938. // part of a minimum or maximum operation. If so, refrain from doing
  939. // any other folding. This helps out other analyses which understand
  940. // non-obfuscated minimum and maximum idioms, such as ScalarEvolution
  941. // and CodeGen. And in this case, at least one of the comparison
  942. // operands has at least one user besides the compare (the select),
  943. // which would often largely negate the benefit of folding anyway.
  944. if (auto *CI = dyn_cast<CmpInst>(SI->getCondition())) {
  945. if (CI->hasOneUse()) {
  946. Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
  947. // FIXME: This is a hack to avoid infinite looping with min/max patterns.
  948. // We have to ensure that vector constants that only differ with
  949. // undef elements are treated as equivalent.
  950. auto areLooselyEqual = [](Value *A, Value *B) {
  951. if (A == B)
  952. return true;
  953. // Test for vector constants.
  954. Constant *ConstA, *ConstB;
  955. if (!match(A, m_Constant(ConstA)) || !match(B, m_Constant(ConstB)))
  956. return false;
  957. // TODO: Deal with FP constants?
  958. if (!A->getType()->isIntOrIntVectorTy() || A->getType() != B->getType())
  959. return false;
  960. // Compare for equality including undefs as equal.
  961. auto *Cmp = ConstantExpr::getCompare(ICmpInst::ICMP_EQ, ConstA, ConstB);
  962. const APInt *C;
  963. return match(Cmp, m_APIntAllowUndef(C)) && C->isOne();
  964. };
  965. if ((areLooselyEqual(TV, Op0) && areLooselyEqual(FV, Op1)) ||
  966. (areLooselyEqual(FV, Op0) && areLooselyEqual(TV, Op1)))
  967. return nullptr;
  968. }
  969. }
  970. Value *NewTV = foldOperationIntoSelectOperand(Op, TV, Builder);
  971. Value *NewFV = foldOperationIntoSelectOperand(Op, FV, Builder);
  972. return SelectInst::Create(SI->getCondition(), NewTV, NewFV, "", nullptr, SI);
  973. }
  974. static Value *foldOperationIntoPhiValue(BinaryOperator *I, Value *InV,
  975. InstCombiner::BuilderTy &Builder) {
  976. bool ConstIsRHS = isa<Constant>(I->getOperand(1));
  977. Constant *C = cast<Constant>(I->getOperand(ConstIsRHS));
  978. if (auto *InC = dyn_cast<Constant>(InV)) {
  979. if (ConstIsRHS)
  980. return ConstantExpr::get(I->getOpcode(), InC, C);
  981. return ConstantExpr::get(I->getOpcode(), C, InC);
  982. }
  983. Value *Op0 = InV, *Op1 = C;
  984. if (!ConstIsRHS)
  985. std::swap(Op0, Op1);
  986. Value *RI = Builder.CreateBinOp(I->getOpcode(), Op0, Op1, "phi.bo");
  987. auto *FPInst = dyn_cast<Instruction>(RI);
  988. if (FPInst && isa<FPMathOperator>(FPInst))
  989. FPInst->copyFastMathFlags(I);
  990. return RI;
  991. }
  992. Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) {
  993. unsigned NumPHIValues = PN->getNumIncomingValues();
  994. if (NumPHIValues == 0)
  995. return nullptr;
  996. // We normally only transform phis with a single use. However, if a PHI has
  997. // multiple uses and they are all the same operation, we can fold *all* of the
  998. // uses into the PHI.
  999. if (!PN->hasOneUse()) {
  1000. // Walk the use list for the instruction, comparing them to I.
  1001. for (User *U : PN->users()) {
  1002. Instruction *UI = cast<Instruction>(U);
  1003. if (UI != &I && !I.isIdenticalTo(UI))
  1004. return nullptr;
  1005. }
  1006. // Otherwise, we can replace *all* users with the new PHI we form.
  1007. }
  1008. // Check to see if all of the operands of the PHI are simple constants
  1009. // (constantint/constantfp/undef). If there is one non-constant value,
  1010. // remember the BB it is in. If there is more than one or if *it* is a PHI,
  1011. // bail out. We don't do arbitrary constant expressions here because moving
  1012. // their computation can be expensive without a cost model.
  1013. BasicBlock *NonConstBB = nullptr;
  1014. for (unsigned i = 0; i != NumPHIValues; ++i) {
  1015. Value *InVal = PN->getIncomingValue(i);
  1016. // For non-freeze, require constant operand
  1017. // For freeze, require non-undef, non-poison operand
  1018. if (!isa<FreezeInst>(I) && match(InVal, m_ImmConstant()))
  1019. continue;
  1020. if (isa<FreezeInst>(I) && isGuaranteedNotToBeUndefOrPoison(InVal))
  1021. continue;
  1022. if (isa<PHINode>(InVal)) return nullptr; // Itself a phi.
  1023. if (NonConstBB) return nullptr; // More than one non-const value.
  1024. NonConstBB = PN->getIncomingBlock(i);
  1025. // If the InVal is an invoke at the end of the pred block, then we can't
  1026. // insert a computation after it without breaking the edge.
  1027. if (isa<InvokeInst>(InVal))
  1028. if (cast<Instruction>(InVal)->getParent() == NonConstBB)
  1029. return nullptr;
  1030. // If the incoming non-constant value is in I's block, we will remove one
  1031. // instruction, but insert another equivalent one, leading to infinite
  1032. // instcombine.
  1033. if (isPotentiallyReachable(I.getParent(), NonConstBB, nullptr, &DT, LI))
  1034. return nullptr;
  1035. }
  1036. // If there is exactly one non-constant value, we can insert a copy of the
  1037. // operation in that block. However, if this is a critical edge, we would be
  1038. // inserting the computation on some other paths (e.g. inside a loop). Only
  1039. // do this if the pred block is unconditionally branching into the phi block.
  1040. // Also, make sure that the pred block is not dead code.
  1041. if (NonConstBB != nullptr) {
  1042. BranchInst *BI = dyn_cast<BranchInst>(NonConstBB->getTerminator());
  1043. if (!BI || !BI->isUnconditional() || !DT.isReachableFromEntry(NonConstBB))
  1044. return nullptr;
  1045. }
  1046. // Okay, we can do the transformation: create the new PHI node.
  1047. PHINode *NewPN = PHINode::Create(I.getType(), PN->getNumIncomingValues());
  1048. InsertNewInstBefore(NewPN, *PN);
  1049. NewPN->takeName(PN);
  1050. // If we are going to have to insert a new computation, do so right before the
  1051. // predecessor's terminator.
  1052. if (NonConstBB)
  1053. Builder.SetInsertPoint(NonConstBB->getTerminator());
  1054. // Next, add all of the operands to the PHI.
  1055. if (SelectInst *SI = dyn_cast<SelectInst>(&I)) {
  1056. // We only currently try to fold the condition of a select when it is a phi,
  1057. // not the true/false values.
  1058. Value *TrueV = SI->getTrueValue();
  1059. Value *FalseV = SI->getFalseValue();
  1060. BasicBlock *PhiTransBB = PN->getParent();
  1061. for (unsigned i = 0; i != NumPHIValues; ++i) {
  1062. BasicBlock *ThisBB = PN->getIncomingBlock(i);
  1063. Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB);
  1064. Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB);
  1065. Value *InV = nullptr;
  1066. // Beware of ConstantExpr: it may eventually evaluate to getNullValue,
  1067. // even if currently isNullValue gives false.
  1068. Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i));
  1069. // For vector constants, we cannot use isNullValue to fold into
  1070. // FalseVInPred versus TrueVInPred. When we have individual nonzero
  1071. // elements in the vector, we will incorrectly fold InC to
  1072. // `TrueVInPred`.
  1073. if (InC && isa<ConstantInt>(InC))
  1074. InV = InC->isNullValue() ? FalseVInPred : TrueVInPred;
  1075. else {
  1076. // Generate the select in the same block as PN's current incoming block.
  1077. // Note: ThisBB need not be the NonConstBB because vector constants
  1078. // which are constants by definition are handled here.
  1079. // FIXME: This can lead to an increase in IR generation because we might
  1080. // generate selects for vector constant phi operand, that could not be
  1081. // folded to TrueVInPred or FalseVInPred as done for ConstantInt. For
  1082. // non-vector phis, this transformation was always profitable because
  1083. // the select would be generated exactly once in the NonConstBB.
  1084. Builder.SetInsertPoint(ThisBB->getTerminator());
  1085. InV = Builder.CreateSelect(PN->getIncomingValue(i), TrueVInPred,
  1086. FalseVInPred, "phi.sel");
  1087. }
  1088. NewPN->addIncoming(InV, ThisBB);
  1089. }
  1090. } else if (CmpInst *CI = dyn_cast<CmpInst>(&I)) {
  1091. Constant *C = cast<Constant>(I.getOperand(1));
  1092. for (unsigned i = 0; i != NumPHIValues; ++i) {
  1093. Value *InV = nullptr;
  1094. if (auto *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
  1095. InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C);
  1096. else
  1097. InV = Builder.CreateCmp(CI->getPredicate(), PN->getIncomingValue(i),
  1098. C, "phi.cmp");
  1099. NewPN->addIncoming(InV, PN->getIncomingBlock(i));
  1100. }
  1101. } else if (auto *BO = dyn_cast<BinaryOperator>(&I)) {
  1102. for (unsigned i = 0; i != NumPHIValues; ++i) {
  1103. Value *InV = foldOperationIntoPhiValue(BO, PN->getIncomingValue(i),
  1104. Builder);
  1105. NewPN->addIncoming(InV, PN->getIncomingBlock(i));
  1106. }
  1107. } else if (isa<FreezeInst>(&I)) {
  1108. for (unsigned i = 0; i != NumPHIValues; ++i) {
  1109. Value *InV;
  1110. if (NonConstBB == PN->getIncomingBlock(i))
  1111. InV = Builder.CreateFreeze(PN->getIncomingValue(i), "phi.fr");
  1112. else
  1113. InV = PN->getIncomingValue(i);
  1114. NewPN->addIncoming(InV, PN->getIncomingBlock(i));
  1115. }
  1116. } else {
  1117. CastInst *CI = cast<CastInst>(&I);
  1118. Type *RetTy = CI->getType();
  1119. for (unsigned i = 0; i != NumPHIValues; ++i) {
  1120. Value *InV;
  1121. if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
  1122. InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy);
  1123. else
  1124. InV = Builder.CreateCast(CI->getOpcode(), PN->getIncomingValue(i),
  1125. I.getType(), "phi.cast");
  1126. NewPN->addIncoming(InV, PN->getIncomingBlock(i));
  1127. }
  1128. }
  1129. for (User *U : make_early_inc_range(PN->users())) {
  1130. Instruction *User = cast<Instruction>(U);
  1131. if (User == &I) continue;
  1132. replaceInstUsesWith(*User, NewPN);
  1133. eraseInstFromFunction(*User);
  1134. }
  1135. return replaceInstUsesWith(I, NewPN);
  1136. }
  1137. Instruction *InstCombinerImpl::foldBinopWithPhiOperands(BinaryOperator &BO) {
  1138. // TODO: This should be similar to the incoming values check in foldOpIntoPhi:
  1139. // we are guarding against replicating the binop in >1 predecessor.
  1140. // This could miss matching a phi with 2 constant incoming values.
  1141. auto *Phi0 = dyn_cast<PHINode>(BO.getOperand(0));
  1142. auto *Phi1 = dyn_cast<PHINode>(BO.getOperand(1));
  1143. if (!Phi0 || !Phi1 || !Phi0->hasOneUse() || !Phi1->hasOneUse() ||
  1144. Phi0->getNumOperands() != 2 || Phi1->getNumOperands() != 2)
  1145. return nullptr;
  1146. // TODO: Remove the restriction for binop being in the same block as the phis.
  1147. if (BO.getParent() != Phi0->getParent() ||
  1148. BO.getParent() != Phi1->getParent())
  1149. return nullptr;
  1150. // Match a pair of incoming constants for one of the predecessor blocks.
  1151. BasicBlock *ConstBB, *OtherBB;
  1152. Constant *C0, *C1;
  1153. if (match(Phi0->getIncomingValue(0), m_ImmConstant(C0))) {
  1154. ConstBB = Phi0->getIncomingBlock(0);
  1155. OtherBB = Phi0->getIncomingBlock(1);
  1156. } else if (match(Phi0->getIncomingValue(1), m_ImmConstant(C0))) {
  1157. ConstBB = Phi0->getIncomingBlock(1);
  1158. OtherBB = Phi0->getIncomingBlock(0);
  1159. } else {
  1160. return nullptr;
  1161. }
  1162. if (!match(Phi1->getIncomingValueForBlock(ConstBB), m_ImmConstant(C1)))
  1163. return nullptr;
  1164. // The block that we are hoisting to must reach here unconditionally.
  1165. // Otherwise, we could be speculatively executing an expensive or
  1166. // non-speculative op.
  1167. auto *PredBlockBranch = dyn_cast<BranchInst>(OtherBB->getTerminator());
  1168. if (!PredBlockBranch || PredBlockBranch->isConditional() ||
  1169. !DT.isReachableFromEntry(OtherBB))
  1170. return nullptr;
  1171. // TODO: This check could be tightened to only apply to binops (div/rem) that
  1172. // are not safe to speculatively execute. But that could allow hoisting
  1173. // potentially expensive instructions (fdiv for example).
  1174. for (auto BBIter = BO.getParent()->begin(); &*BBIter != &BO; ++BBIter)
  1175. if (!isGuaranteedToTransferExecutionToSuccessor(&*BBIter))
  1176. return nullptr;
  1177. // Make a new binop in the predecessor block with the non-constant incoming
  1178. // values.
  1179. Builder.SetInsertPoint(PredBlockBranch);
  1180. Value *NewBO = Builder.CreateBinOp(BO.getOpcode(),
  1181. Phi0->getIncomingValueForBlock(OtherBB),
  1182. Phi1->getIncomingValueForBlock(OtherBB));
  1183. if (auto *NotFoldedNewBO = dyn_cast<BinaryOperator>(NewBO))
  1184. NotFoldedNewBO->copyIRFlags(&BO);
  1185. // Fold constants for the predecessor block with constant incoming values.
  1186. Constant *NewC = ConstantExpr::get(BO.getOpcode(), C0, C1);
  1187. // Replace the binop with a phi of the new values. The old phis are dead.
  1188. PHINode *NewPhi = PHINode::Create(BO.getType(), 2);
  1189. NewPhi->addIncoming(NewBO, OtherBB);
  1190. NewPhi->addIncoming(NewC, ConstBB);
  1191. return NewPhi;
  1192. }
  1193. Instruction *InstCombinerImpl::foldBinOpIntoSelectOrPhi(BinaryOperator &I) {
  1194. if (!isa<Constant>(I.getOperand(1)))
  1195. return nullptr;
  1196. if (auto *Sel = dyn_cast<SelectInst>(I.getOperand(0))) {
  1197. if (Instruction *NewSel = FoldOpIntoSelect(I, Sel))
  1198. return NewSel;
  1199. } else if (auto *PN = dyn_cast<PHINode>(I.getOperand(0))) {
  1200. if (Instruction *NewPhi = foldOpIntoPhi(I, PN))
  1201. return NewPhi;
  1202. }
  1203. return nullptr;
  1204. }
  1205. /// Given a pointer type and a constant offset, determine whether or not there
  1206. /// is a sequence of GEP indices into the pointed type that will land us at the
  1207. /// specified offset. If so, fill them into NewIndices and return the resultant
  1208. /// element type, otherwise return null.
  1209. static Type *findElementAtOffset(PointerType *PtrTy, int64_t IntOffset,
  1210. SmallVectorImpl<Value *> &NewIndices,
  1211. const DataLayout &DL) {
  1212. // Only used by visitGEPOfBitcast(), which is skipped for opaque pointers.
  1213. Type *Ty = PtrTy->getNonOpaquePointerElementType();
  1214. if (!Ty->isSized())
  1215. return nullptr;
  1216. APInt Offset(DL.getIndexTypeSizeInBits(PtrTy), IntOffset);
  1217. SmallVector<APInt> Indices = DL.getGEPIndicesForOffset(Ty, Offset);
  1218. if (!Offset.isZero())
  1219. return nullptr;
  1220. for (const APInt &Index : Indices)
  1221. NewIndices.push_back(ConstantInt::get(PtrTy->getContext(), Index));
  1222. return Ty;
  1223. }
  1224. static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src) {
  1225. // If this GEP has only 0 indices, it is the same pointer as
  1226. // Src. If Src is not a trivial GEP too, don't combine
  1227. // the indices.
  1228. if (GEP.hasAllZeroIndices() && !Src.hasAllZeroIndices() &&
  1229. !Src.hasOneUse())
  1230. return false;
  1231. return true;
  1232. }
  1233. /// Return a value X such that Val = X * Scale, or null if none.
  1234. /// If the multiplication is known not to overflow, then NoSignedWrap is set.
  1235. Value *InstCombinerImpl::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
  1236. assert(isa<IntegerType>(Val->getType()) && "Can only descale integers!");
  1237. assert(cast<IntegerType>(Val->getType())->getBitWidth() ==
  1238. Scale.getBitWidth() && "Scale not compatible with value!");
  1239. // If Val is zero or Scale is one then Val = Val * Scale.
  1240. if (match(Val, m_Zero()) || Scale == 1) {
  1241. NoSignedWrap = true;
  1242. return Val;
  1243. }
  1244. // If Scale is zero then it does not divide Val.
  1245. if (Scale.isMinValue())
  1246. return nullptr;
  1247. // Look through chains of multiplications, searching for a constant that is
  1248. // divisible by Scale. For example, descaling X*(Y*(Z*4)) by a factor of 4
  1249. // will find the constant factor 4 and produce X*(Y*Z). Descaling X*(Y*8) by
  1250. // a factor of 4 will produce X*(Y*2). The principle of operation is to bore
  1251. // down from Val:
  1252. //
  1253. // Val = M1 * X || Analysis starts here and works down
  1254. // M1 = M2 * Y || Doesn't descend into terms with more
  1255. // M2 = Z * 4 \/ than one use
  1256. //
  1257. // Then to modify a term at the bottom:
  1258. //
  1259. // Val = M1 * X
  1260. // M1 = Z * Y || Replaced M2 with Z
  1261. //
  1262. // Then to work back up correcting nsw flags.
  1263. // Op - the term we are currently analyzing. Starts at Val then drills down.
  1264. // Replaced with its descaled value before exiting from the drill down loop.
  1265. Value *Op = Val;
  1266. // Parent - initially null, but after drilling down notes where Op came from.
  1267. // In the example above, Parent is (Val, 0) when Op is M1, because M1 is the
  1268. // 0'th operand of Val.
  1269. std::pair<Instruction *, unsigned> Parent;
  1270. // Set if the transform requires a descaling at deeper levels that doesn't
  1271. // overflow.
  1272. bool RequireNoSignedWrap = false;
  1273. // Log base 2 of the scale. Negative if not a power of 2.
  1274. int32_t logScale = Scale.exactLogBase2();
  1275. for (;; Op = Parent.first->getOperand(Parent.second)) { // Drill down
  1276. if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
  1277. // If Op is a constant divisible by Scale then descale to the quotient.
  1278. APInt Quotient(Scale), Remainder(Scale); // Init ensures right bitwidth.
  1279. APInt::sdivrem(CI->getValue(), Scale, Quotient, Remainder);
  1280. if (!Remainder.isMinValue())
  1281. // Not divisible by Scale.
  1282. return nullptr;
  1283. // Replace with the quotient in the parent.
  1284. Op = ConstantInt::get(CI->getType(), Quotient);
  1285. NoSignedWrap = true;
  1286. break;
  1287. }
  1288. if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op)) {
  1289. if (BO->getOpcode() == Instruction::Mul) {
  1290. // Multiplication.
  1291. NoSignedWrap = BO->hasNoSignedWrap();
  1292. if (RequireNoSignedWrap && !NoSignedWrap)
  1293. return nullptr;
  1294. // There are three cases for multiplication: multiplication by exactly
  1295. // the scale, multiplication by a constant different to the scale, and
  1296. // multiplication by something else.
  1297. Value *LHS = BO->getOperand(0);
  1298. Value *RHS = BO->getOperand(1);
  1299. if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
  1300. // Multiplication by a constant.
  1301. if (CI->getValue() == Scale) {
  1302. // Multiplication by exactly the scale, replace the multiplication
  1303. // by its left-hand side in the parent.
  1304. Op = LHS;
  1305. break;
  1306. }
  1307. // Otherwise drill down into the constant.
  1308. if (!Op->hasOneUse())
  1309. return nullptr;
  1310. Parent = std::make_pair(BO, 1);
  1311. continue;
  1312. }
  1313. // Multiplication by something else. Drill down into the left-hand side
  1314. // since that's where the reassociate pass puts the good stuff.
  1315. if (!Op->hasOneUse())
  1316. return nullptr;
  1317. Parent = std::make_pair(BO, 0);
  1318. continue;
  1319. }
  1320. if (logScale > 0 && BO->getOpcode() == Instruction::Shl &&
  1321. isa<ConstantInt>(BO->getOperand(1))) {
  1322. // Multiplication by a power of 2.
  1323. NoSignedWrap = BO->hasNoSignedWrap();
  1324. if (RequireNoSignedWrap && !NoSignedWrap)
  1325. return nullptr;
  1326. Value *LHS = BO->getOperand(0);
  1327. int32_t Amt = cast<ConstantInt>(BO->getOperand(1))->
  1328. getLimitedValue(Scale.getBitWidth());
  1329. // Op = LHS << Amt.
  1330. if (Amt == logScale) {
  1331. // Multiplication by exactly the scale, replace the multiplication
  1332. // by its left-hand side in the parent.
  1333. Op = LHS;
  1334. break;
  1335. }
  1336. if (Amt < logScale || !Op->hasOneUse())
  1337. return nullptr;
  1338. // Multiplication by more than the scale. Reduce the multiplying amount
  1339. // by the scale in the parent.
  1340. Parent = std::make_pair(BO, 1);
  1341. Op = ConstantInt::get(BO->getType(), Amt - logScale);
  1342. break;
  1343. }
  1344. }
  1345. if (!Op->hasOneUse())
  1346. return nullptr;
  1347. if (CastInst *Cast = dyn_cast<CastInst>(Op)) {
  1348. if (Cast->getOpcode() == Instruction::SExt) {
  1349. // Op is sign-extended from a smaller type, descale in the smaller type.
  1350. unsigned SmallSize = Cast->getSrcTy()->getPrimitiveSizeInBits();
  1351. APInt SmallScale = Scale.trunc(SmallSize);
  1352. // Suppose Op = sext X, and we descale X as Y * SmallScale. We want to
  1353. // descale Op as (sext Y) * Scale. In order to have
  1354. // sext (Y * SmallScale) = (sext Y) * Scale
  1355. // some conditions need to hold however: SmallScale must sign-extend to
  1356. // Scale and the multiplication Y * SmallScale should not overflow.
  1357. if (SmallScale.sext(Scale.getBitWidth()) != Scale)
  1358. // SmallScale does not sign-extend to Scale.
  1359. return nullptr;
  1360. assert(SmallScale.exactLogBase2() == logScale);
  1361. // Require that Y * SmallScale must not overflow.
  1362. RequireNoSignedWrap = true;
  1363. // Drill down through the cast.
  1364. Parent = std::make_pair(Cast, 0);
  1365. Scale = SmallScale;
  1366. continue;
  1367. }
  1368. if (Cast->getOpcode() == Instruction::Trunc) {
  1369. // Op is truncated from a larger type, descale in the larger type.
  1370. // Suppose Op = trunc X, and we descale X as Y * sext Scale. Then
  1371. // trunc (Y * sext Scale) = (trunc Y) * Scale
  1372. // always holds. However (trunc Y) * Scale may overflow even if
  1373. // trunc (Y * sext Scale) does not, so nsw flags need to be cleared
  1374. // from this point up in the expression (see later).
  1375. if (RequireNoSignedWrap)
  1376. return nullptr;
  1377. // Drill down through the cast.
  1378. unsigned LargeSize = Cast->getSrcTy()->getPrimitiveSizeInBits();
  1379. Parent = std::make_pair(Cast, 0);
  1380. Scale = Scale.sext(LargeSize);
  1381. if (logScale + 1 == (int32_t)Cast->getType()->getPrimitiveSizeInBits())
  1382. logScale = -1;
  1383. assert(Scale.exactLogBase2() == logScale);
  1384. continue;
  1385. }
  1386. }
  1387. // Unsupported expression, bail out.
  1388. return nullptr;
  1389. }
  1390. // If Op is zero then Val = Op * Scale.
  1391. if (match(Op, m_Zero())) {
  1392. NoSignedWrap = true;
  1393. return Op;
  1394. }
  1395. // We know that we can successfully descale, so from here on we can safely
  1396. // modify the IR. Op holds the descaled version of the deepest term in the
  1397. // expression. NoSignedWrap is 'true' if multiplying Op by Scale is known
  1398. // not to overflow.
  1399. if (!Parent.first)
  1400. // The expression only had one term.
  1401. return Op;
  1402. // Rewrite the parent using the descaled version of its operand.
  1403. assert(Parent.first->hasOneUse() && "Drilled down when more than one use!");
  1404. assert(Op != Parent.first->getOperand(Parent.second) &&
  1405. "Descaling was a no-op?");
  1406. replaceOperand(*Parent.first, Parent.second, Op);
  1407. Worklist.push(Parent.first);
  1408. // Now work back up the expression correcting nsw flags. The logic is based
  1409. // on the following observation: if X * Y is known not to overflow as a signed
  1410. // multiplication, and Y is replaced by a value Z with smaller absolute value,
  1411. // then X * Z will not overflow as a signed multiplication either. As we work
  1412. // our way up, having NoSignedWrap 'true' means that the descaled value at the
  1413. // current level has strictly smaller absolute value than the original.
  1414. Instruction *Ancestor = Parent.first;
  1415. do {
  1416. if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Ancestor)) {
  1417. // If the multiplication wasn't nsw then we can't say anything about the
  1418. // value of the descaled multiplication, and we have to clear nsw flags
  1419. // from this point on up.
  1420. bool OpNoSignedWrap = BO->hasNoSignedWrap();
  1421. NoSignedWrap &= OpNoSignedWrap;
  1422. if (NoSignedWrap != OpNoSignedWrap) {
  1423. BO->setHasNoSignedWrap(NoSignedWrap);
  1424. Worklist.push(Ancestor);
  1425. }
  1426. } else if (Ancestor->getOpcode() == Instruction::Trunc) {
  1427. // The fact that the descaled input to the trunc has smaller absolute
  1428. // value than the original input doesn't tell us anything useful about
  1429. // the absolute values of the truncations.
  1430. NoSignedWrap = false;
  1431. }
  1432. assert((Ancestor->getOpcode() != Instruction::SExt || NoSignedWrap) &&
  1433. "Failed to keep proper track of nsw flags while drilling down?");
  1434. if (Ancestor == Val)
  1435. // Got to the top, all done!
  1436. return Val;
  1437. // Move up one level in the expression.
  1438. assert(Ancestor->hasOneUse() && "Drilled down when more than one use!");
  1439. Ancestor = Ancestor->user_back();
  1440. } while (true);
  1441. }
  1442. Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
  1443. if (!isa<VectorType>(Inst.getType()))
  1444. return nullptr;
  1445. BinaryOperator::BinaryOps Opcode = Inst.getOpcode();
  1446. Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1);
  1447. assert(cast<VectorType>(LHS->getType())->getElementCount() ==
  1448. cast<VectorType>(Inst.getType())->getElementCount());
  1449. assert(cast<VectorType>(RHS->getType())->getElementCount() ==
  1450. cast<VectorType>(Inst.getType())->getElementCount());
  1451. // If both operands of the binop are vector concatenations, then perform the
  1452. // narrow binop on each pair of the source operands followed by concatenation
  1453. // of the results.
  1454. Value *L0, *L1, *R0, *R1;
  1455. ArrayRef<int> Mask;
  1456. if (match(LHS, m_Shuffle(m_Value(L0), m_Value(L1), m_Mask(Mask))) &&
  1457. match(RHS, m_Shuffle(m_Value(R0), m_Value(R1), m_SpecificMask(Mask))) &&
  1458. LHS->hasOneUse() && RHS->hasOneUse() &&
  1459. cast<ShuffleVectorInst>(LHS)->isConcat() &&
  1460. cast<ShuffleVectorInst>(RHS)->isConcat()) {
  1461. // This transform does not have the speculative execution constraint as
  1462. // below because the shuffle is a concatenation. The new binops are
  1463. // operating on exactly the same elements as the existing binop.
  1464. // TODO: We could ease the mask requirement to allow different undef lanes,
  1465. // but that requires an analysis of the binop-with-undef output value.
  1466. Value *NewBO0 = Builder.CreateBinOp(Opcode, L0, R0);
  1467. if (auto *BO = dyn_cast<BinaryOperator>(NewBO0))
  1468. BO->copyIRFlags(&Inst);
  1469. Value *NewBO1 = Builder.CreateBinOp(Opcode, L1, R1);
  1470. if (auto *BO = dyn_cast<BinaryOperator>(NewBO1))
  1471. BO->copyIRFlags(&Inst);
  1472. return new ShuffleVectorInst(NewBO0, NewBO1, Mask);
  1473. }
  1474. // It may not be safe to reorder shuffles and things like div, urem, etc.
  1475. // because we may trap when executing those ops on unknown vector elements.
  1476. // See PR20059.
  1477. if (!isSafeToSpeculativelyExecute(&Inst))
  1478. return nullptr;
  1479. auto createBinOpShuffle = [&](Value *X, Value *Y, ArrayRef<int> M) {
  1480. Value *XY = Builder.CreateBinOp(Opcode, X, Y);
  1481. if (auto *BO = dyn_cast<BinaryOperator>(XY))
  1482. BO->copyIRFlags(&Inst);
  1483. return new ShuffleVectorInst(XY, M);
  1484. };
  1485. // If both arguments of the binary operation are shuffles that use the same
  1486. // mask and shuffle within a single vector, move the shuffle after the binop.
  1487. Value *V1, *V2;
  1488. if (match(LHS, m_Shuffle(m_Value(V1), m_Undef(), m_Mask(Mask))) &&
  1489. match(RHS, m_Shuffle(m_Value(V2), m_Undef(), m_SpecificMask(Mask))) &&
  1490. V1->getType() == V2->getType() &&
  1491. (LHS->hasOneUse() || RHS->hasOneUse() || LHS == RHS)) {
  1492. // Op(shuffle(V1, Mask), shuffle(V2, Mask)) -> shuffle(Op(V1, V2), Mask)
  1493. return createBinOpShuffle(V1, V2, Mask);
  1494. }
  1495. // If both arguments of a commutative binop are select-shuffles that use the
  1496. // same mask with commuted operands, the shuffles are unnecessary.
  1497. if (Inst.isCommutative() &&
  1498. match(LHS, m_Shuffle(m_Value(V1), m_Value(V2), m_Mask(Mask))) &&
  1499. match(RHS,
  1500. m_Shuffle(m_Specific(V2), m_Specific(V1), m_SpecificMask(Mask)))) {
  1501. auto *LShuf = cast<ShuffleVectorInst>(LHS);
  1502. auto *RShuf = cast<ShuffleVectorInst>(RHS);
  1503. // TODO: Allow shuffles that contain undefs in the mask?
  1504. // That is legal, but it reduces undef knowledge.
  1505. // TODO: Allow arbitrary shuffles by shuffling after binop?
  1506. // That might be legal, but we have to deal with poison.
  1507. if (LShuf->isSelect() &&
  1508. !is_contained(LShuf->getShuffleMask(), UndefMaskElem) &&
  1509. RShuf->isSelect() &&
  1510. !is_contained(RShuf->getShuffleMask(), UndefMaskElem)) {
  1511. // Example:
  1512. // LHS = shuffle V1, V2, <0, 5, 6, 3>
  1513. // RHS = shuffle V2, V1, <0, 5, 6, 3>
  1514. // LHS + RHS --> (V10+V20, V21+V11, V22+V12, V13+V23) --> V1 + V2
  1515. Instruction *NewBO = BinaryOperator::Create(Opcode, V1, V2);
  1516. NewBO->copyIRFlags(&Inst);
  1517. return NewBO;
  1518. }
  1519. }
  1520. // If one argument is a shuffle within one vector and the other is a constant,
  1521. // try moving the shuffle after the binary operation. This canonicalization
  1522. // intends to move shuffles closer to other shuffles and binops closer to
  1523. // other binops, so they can be folded. It may also enable demanded elements
  1524. // transforms.
  1525. Constant *C;
  1526. auto *InstVTy = dyn_cast<FixedVectorType>(Inst.getType());
  1527. if (InstVTy &&
  1528. match(&Inst,
  1529. m_c_BinOp(m_OneUse(m_Shuffle(m_Value(V1), m_Undef(), m_Mask(Mask))),
  1530. m_ImmConstant(C))) &&
  1531. cast<FixedVectorType>(V1->getType())->getNumElements() <=
  1532. InstVTy->getNumElements()) {
  1533. assert(InstVTy->getScalarType() == V1->getType()->getScalarType() &&
  1534. "Shuffle should not change scalar type");
  1535. // Find constant NewC that has property:
  1536. // shuffle(NewC, ShMask) = C
  1537. // If such constant does not exist (example: ShMask=<0,0> and C=<1,2>)
  1538. // reorder is not possible. A 1-to-1 mapping is not required. Example:
  1539. // ShMask = <1,1,2,2> and C = <5,5,6,6> --> NewC = <undef,5,6,undef>
  1540. bool ConstOp1 = isa<Constant>(RHS);
  1541. ArrayRef<int> ShMask = Mask;
  1542. unsigned SrcVecNumElts =
  1543. cast<FixedVectorType>(V1->getType())->getNumElements();
  1544. UndefValue *UndefScalar = UndefValue::get(C->getType()->getScalarType());
  1545. SmallVector<Constant *, 16> NewVecC(SrcVecNumElts, UndefScalar);
  1546. bool MayChange = true;
  1547. unsigned NumElts = InstVTy->getNumElements();
  1548. for (unsigned I = 0; I < NumElts; ++I) {
  1549. Constant *CElt = C->getAggregateElement(I);
  1550. if (ShMask[I] >= 0) {
  1551. assert(ShMask[I] < (int)NumElts && "Not expecting narrowing shuffle");
  1552. Constant *NewCElt = NewVecC[ShMask[I]];
  1553. // Bail out if:
  1554. // 1. The constant vector contains a constant expression.
  1555. // 2. The shuffle needs an element of the constant vector that can't
  1556. // be mapped to a new constant vector.
  1557. // 3. This is a widening shuffle that copies elements of V1 into the
  1558. // extended elements (extending with undef is allowed).
  1559. if (!CElt || (!isa<UndefValue>(NewCElt) && NewCElt != CElt) ||
  1560. I >= SrcVecNumElts) {
  1561. MayChange = false;
  1562. break;
  1563. }
  1564. NewVecC[ShMask[I]] = CElt;
  1565. }
  1566. // If this is a widening shuffle, we must be able to extend with undef
  1567. // elements. If the original binop does not produce an undef in the high
  1568. // lanes, then this transform is not safe.
  1569. // Similarly for undef lanes due to the shuffle mask, we can only
  1570. // transform binops that preserve undef.
  1571. // TODO: We could shuffle those non-undef constant values into the
  1572. // result by using a constant vector (rather than an undef vector)
  1573. // as operand 1 of the new binop, but that might be too aggressive
  1574. // for target-independent shuffle creation.
  1575. if (I >= SrcVecNumElts || ShMask[I] < 0) {
  1576. Constant *MaybeUndef =
  1577. ConstOp1 ? ConstantExpr::get(Opcode, UndefScalar, CElt)
  1578. : ConstantExpr::get(Opcode, CElt, UndefScalar);
  1579. if (!match(MaybeUndef, m_Undef())) {
  1580. MayChange = false;
  1581. break;
  1582. }
  1583. }
  1584. }
  1585. if (MayChange) {
  1586. Constant *NewC = ConstantVector::get(NewVecC);
  1587. // It may not be safe to execute a binop on a vector with undef elements
  1588. // because the entire instruction can be folded to undef or create poison
  1589. // that did not exist in the original code.
  1590. if (Inst.isIntDivRem() || (Inst.isShift() && ConstOp1))
  1591. NewC = getSafeVectorConstantForBinop(Opcode, NewC, ConstOp1);
  1592. // Op(shuffle(V1, Mask), C) -> shuffle(Op(V1, NewC), Mask)
  1593. // Op(C, shuffle(V1, Mask)) -> shuffle(Op(NewC, V1), Mask)
  1594. Value *NewLHS = ConstOp1 ? V1 : NewC;
  1595. Value *NewRHS = ConstOp1 ? NewC : V1;
  1596. return createBinOpShuffle(NewLHS, NewRHS, Mask);
  1597. }
  1598. }
  1599. // Try to reassociate to sink a splat shuffle after a binary operation.
  1600. if (Inst.isAssociative() && Inst.isCommutative()) {
  1601. // Canonicalize shuffle operand as LHS.
  1602. if (isa<ShuffleVectorInst>(RHS))
  1603. std::swap(LHS, RHS);
  1604. Value *X;
  1605. ArrayRef<int> MaskC;
  1606. int SplatIndex;
  1607. Value *Y, *OtherOp;
  1608. if (!match(LHS,
  1609. m_OneUse(m_Shuffle(m_Value(X), m_Undef(), m_Mask(MaskC)))) ||
  1610. !match(MaskC, m_SplatOrUndefMask(SplatIndex)) ||
  1611. X->getType() != Inst.getType() ||
  1612. !match(RHS, m_OneUse(m_BinOp(Opcode, m_Value(Y), m_Value(OtherOp)))))
  1613. return nullptr;
  1614. // FIXME: This may not be safe if the analysis allows undef elements. By
  1615. // moving 'Y' before the splat shuffle, we are implicitly assuming
  1616. // that it is not undef/poison at the splat index.
  1617. if (isSplatValue(OtherOp, SplatIndex)) {
  1618. std::swap(Y, OtherOp);
  1619. } else if (!isSplatValue(Y, SplatIndex)) {
  1620. return nullptr;
  1621. }
  1622. // X and Y are splatted values, so perform the binary operation on those
  1623. // values followed by a splat followed by the 2nd binary operation:
  1624. // bo (splat X), (bo Y, OtherOp) --> bo (splat (bo X, Y)), OtherOp
  1625. Value *NewBO = Builder.CreateBinOp(Opcode, X, Y);
  1626. SmallVector<int, 8> NewMask(MaskC.size(), SplatIndex);
  1627. Value *NewSplat = Builder.CreateShuffleVector(NewBO, NewMask);
  1628. Instruction *R = BinaryOperator::Create(Opcode, NewSplat, OtherOp);
  1629. // Intersect FMF on both new binops. Other (poison-generating) flags are
  1630. // dropped to be safe.
  1631. if (isa<FPMathOperator>(R)) {
  1632. R->copyFastMathFlags(&Inst);
  1633. R->andIRFlags(RHS);
  1634. }
  1635. if (auto *NewInstBO = dyn_cast<BinaryOperator>(NewBO))
  1636. NewInstBO->copyIRFlags(R);
  1637. return R;
  1638. }
  1639. return nullptr;
  1640. }
  1641. /// Try to narrow the width of a binop if at least 1 operand is an extend of
  1642. /// of a value. This requires a potentially expensive known bits check to make
  1643. /// sure the narrow op does not overflow.
  1644. Instruction *InstCombinerImpl::narrowMathIfNoOverflow(BinaryOperator &BO) {
  1645. // We need at least one extended operand.
  1646. Value *Op0 = BO.getOperand(0), *Op1 = BO.getOperand(1);
  1647. // If this is a sub, we swap the operands since we always want an extension
  1648. // on the RHS. The LHS can be an extension or a constant.
  1649. if (BO.getOpcode() == Instruction::Sub)
  1650. std::swap(Op0, Op1);
  1651. Value *X;
  1652. bool IsSext = match(Op0, m_SExt(m_Value(X)));
  1653. if (!IsSext && !match(Op0, m_ZExt(m_Value(X))))
  1654. return nullptr;
  1655. // If both operands are the same extension from the same source type and we
  1656. // can eliminate at least one (hasOneUse), this might work.
  1657. CastInst::CastOps CastOpc = IsSext ? Instruction::SExt : Instruction::ZExt;
  1658. Value *Y;
  1659. if (!(match(Op1, m_ZExtOrSExt(m_Value(Y))) && X->getType() == Y->getType() &&
  1660. cast<Operator>(Op1)->getOpcode() == CastOpc &&
  1661. (Op0->hasOneUse() || Op1->hasOneUse()))) {
  1662. // If that did not match, see if we have a suitable constant operand.
  1663. // Truncating and extending must produce the same constant.
  1664. Constant *WideC;
  1665. if (!Op0->hasOneUse() || !match(Op1, m_Constant(WideC)))
  1666. return nullptr;
  1667. Constant *NarrowC = ConstantExpr::getTrunc(WideC, X->getType());
  1668. if (ConstantExpr::getCast(CastOpc, NarrowC, BO.getType()) != WideC)
  1669. return nullptr;
  1670. Y = NarrowC;
  1671. }
  1672. // Swap back now that we found our operands.
  1673. if (BO.getOpcode() == Instruction::Sub)
  1674. std::swap(X, Y);
  1675. // Both operands have narrow versions. Last step: the math must not overflow
  1676. // in the narrow width.
  1677. if (!willNotOverflow(BO.getOpcode(), X, Y, BO, IsSext))
  1678. return nullptr;
  1679. // bo (ext X), (ext Y) --> ext (bo X, Y)
  1680. // bo (ext X), C --> ext (bo X, C')
  1681. Value *NarrowBO = Builder.CreateBinOp(BO.getOpcode(), X, Y, "narrow");
  1682. if (auto *NewBinOp = dyn_cast<BinaryOperator>(NarrowBO)) {
  1683. if (IsSext)
  1684. NewBinOp->setHasNoSignedWrap();
  1685. else
  1686. NewBinOp->setHasNoUnsignedWrap();
  1687. }
  1688. return CastInst::Create(CastOpc, NarrowBO, BO.getType());
  1689. }
  1690. static bool isMergedGEPInBounds(GEPOperator &GEP1, GEPOperator &GEP2) {
  1691. // At least one GEP must be inbounds.
  1692. if (!GEP1.isInBounds() && !GEP2.isInBounds())
  1693. return false;
  1694. return (GEP1.isInBounds() || GEP1.hasAllZeroIndices()) &&
  1695. (GEP2.isInBounds() || GEP2.hasAllZeroIndices());
  1696. }
  1697. /// Thread a GEP operation with constant indices through the constant true/false
  1698. /// arms of a select.
  1699. static Instruction *foldSelectGEP(GetElementPtrInst &GEP,
  1700. InstCombiner::BuilderTy &Builder) {
  1701. if (!GEP.hasAllConstantIndices())
  1702. return nullptr;
  1703. Instruction *Sel;
  1704. Value *Cond;
  1705. Constant *TrueC, *FalseC;
  1706. if (!match(GEP.getPointerOperand(), m_Instruction(Sel)) ||
  1707. !match(Sel,
  1708. m_Select(m_Value(Cond), m_Constant(TrueC), m_Constant(FalseC))))
  1709. return nullptr;
  1710. // gep (select Cond, TrueC, FalseC), IndexC --> select Cond, TrueC', FalseC'
  1711. // Propagate 'inbounds' and metadata from existing instructions.
  1712. // Note: using IRBuilder to create the constants for efficiency.
  1713. SmallVector<Value *, 4> IndexC(GEP.indices());
  1714. bool IsInBounds = GEP.isInBounds();
  1715. Type *Ty = GEP.getSourceElementType();
  1716. Value *NewTrueC = IsInBounds ? Builder.CreateInBoundsGEP(Ty, TrueC, IndexC)
  1717. : Builder.CreateGEP(Ty, TrueC, IndexC);
  1718. Value *NewFalseC = IsInBounds ? Builder.CreateInBoundsGEP(Ty, FalseC, IndexC)
  1719. : Builder.CreateGEP(Ty, FalseC, IndexC);
  1720. return SelectInst::Create(Cond, NewTrueC, NewFalseC, "", nullptr, Sel);
  1721. }
  1722. Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,
  1723. GEPOperator *Src) {
  1724. // Combine Indices - If the source pointer to this getelementptr instruction
  1725. // is a getelementptr instruction with matching element type, combine the
  1726. // indices of the two getelementptr instructions into a single instruction.
  1727. if (Src->getResultElementType() != GEP.getSourceElementType())
  1728. return nullptr;
  1729. if (!shouldMergeGEPs(*cast<GEPOperator>(&GEP), *Src))
  1730. return nullptr;
  1731. if (Src->getNumOperands() == 2 && GEP.getNumOperands() == 2 &&
  1732. Src->hasOneUse()) {
  1733. Value *GO1 = GEP.getOperand(1);
  1734. Value *SO1 = Src->getOperand(1);
  1735. if (LI) {
  1736. // Try to reassociate loop invariant GEP chains to enable LICM.
  1737. if (Loop *L = LI->getLoopFor(GEP.getParent())) {
  1738. // Reassociate the two GEPs if SO1 is variant in the loop and GO1 is
  1739. // invariant: this breaks the dependence between GEPs and allows LICM
  1740. // to hoist the invariant part out of the loop.
  1741. if (L->isLoopInvariant(GO1) && !L->isLoopInvariant(SO1)) {
  1742. // We have to be careful here.
  1743. // We have something like:
  1744. // %src = getelementptr <ty>, <ty>* %base, <ty> %idx
  1745. // %gep = getelementptr <ty>, <ty>* %src, <ty> %idx2
  1746. // If we just swap idx & idx2 then we could inadvertantly
  1747. // change %src from a vector to a scalar, or vice versa.
  1748. // Cases:
  1749. // 1) %base a scalar & idx a scalar & idx2 a vector
  1750. // => Swapping idx & idx2 turns %src into a vector type.
  1751. // 2) %base a scalar & idx a vector & idx2 a scalar
  1752. // => Swapping idx & idx2 turns %src in a scalar type
  1753. // 3) %base, %idx, and %idx2 are scalars
  1754. // => %src & %gep are scalars
  1755. // => swapping idx & idx2 is safe
  1756. // 4) %base a vector
  1757. // => %src is a vector
  1758. // => swapping idx & idx2 is safe.
  1759. auto *SO0 = Src->getOperand(0);
  1760. auto *SO0Ty = SO0->getType();
  1761. if (!isa<VectorType>(GEP.getType()) || // case 3
  1762. isa<VectorType>(SO0Ty)) { // case 4
  1763. Src->setOperand(1, GO1);
  1764. GEP.setOperand(1, SO1);
  1765. return &GEP;
  1766. } else {
  1767. // Case 1 or 2
  1768. // -- have to recreate %src & %gep
  1769. // put NewSrc at same location as %src
  1770. Builder.SetInsertPoint(cast<Instruction>(Src));
  1771. Value *NewSrc = Builder.CreateGEP(
  1772. GEP.getSourceElementType(), SO0, GO1, Src->getName());
  1773. // Propagate 'inbounds' if the new source was not constant-folded.
  1774. if (auto *NewSrcGEPI = dyn_cast<GetElementPtrInst>(NewSrc))
  1775. NewSrcGEPI->setIsInBounds(Src->isInBounds());
  1776. GetElementPtrInst *NewGEP = GetElementPtrInst::Create(
  1777. GEP.getSourceElementType(), NewSrc, {SO1});
  1778. NewGEP->setIsInBounds(GEP.isInBounds());
  1779. return NewGEP;
  1780. }
  1781. }
  1782. }
  1783. }
  1784. }
  1785. // Note that if our source is a gep chain itself then we wait for that
  1786. // chain to be resolved before we perform this transformation. This
  1787. // avoids us creating a TON of code in some cases.
  1788. if (auto *SrcGEP = dyn_cast<GEPOperator>(Src->getOperand(0)))
  1789. if (SrcGEP->getNumOperands() == 2 && shouldMergeGEPs(*Src, *SrcGEP))
  1790. return nullptr; // Wait until our source is folded to completion.
  1791. SmallVector<Value*, 8> Indices;
  1792. // Find out whether the last index in the source GEP is a sequential idx.
  1793. bool EndsWithSequential = false;
  1794. for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src);
  1795. I != E; ++I)
  1796. EndsWithSequential = I.isSequential();
  1797. // Can we combine the two pointer arithmetics offsets?
  1798. if (EndsWithSequential) {
  1799. // Replace: gep (gep %P, long B), long A, ...
  1800. // With: T = long A+B; gep %P, T, ...
  1801. Value *SO1 = Src->getOperand(Src->getNumOperands()-1);
  1802. Value *GO1 = GEP.getOperand(1);
  1803. // If they aren't the same type, then the input hasn't been processed
  1804. // by the loop above yet (which canonicalizes sequential index types to
  1805. // intptr_t). Just avoid transforming this until the input has been
  1806. // normalized.
  1807. if (SO1->getType() != GO1->getType())
  1808. return nullptr;
  1809. Value *Sum =
  1810. SimplifyAddInst(GO1, SO1, false, false, SQ.getWithInstruction(&GEP));
  1811. // Only do the combine when we are sure the cost after the
  1812. // merge is never more than that before the merge.
  1813. if (Sum == nullptr)
  1814. return nullptr;
  1815. // Update the GEP in place if possible.
  1816. if (Src->getNumOperands() == 2) {
  1817. GEP.setIsInBounds(isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP)));
  1818. replaceOperand(GEP, 0, Src->getOperand(0));
  1819. replaceOperand(GEP, 1, Sum);
  1820. return &GEP;
  1821. }
  1822. Indices.append(Src->op_begin()+1, Src->op_end()-1);
  1823. Indices.push_back(Sum);
  1824. Indices.append(GEP.op_begin()+2, GEP.op_end());
  1825. } else if (isa<Constant>(*GEP.idx_begin()) &&
  1826. cast<Constant>(*GEP.idx_begin())->isNullValue() &&
  1827. Src->getNumOperands() != 1) {
  1828. // Otherwise we can do the fold if the first index of the GEP is a zero
  1829. Indices.append(Src->op_begin()+1, Src->op_end());
  1830. Indices.append(GEP.idx_begin()+1, GEP.idx_end());
  1831. }
  1832. if (!Indices.empty())
  1833. return isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP))
  1834. ? GetElementPtrInst::CreateInBounds(
  1835. Src->getSourceElementType(), Src->getOperand(0), Indices,
  1836. GEP.getName())
  1837. : GetElementPtrInst::Create(Src->getSourceElementType(),
  1838. Src->getOperand(0), Indices,
  1839. GEP.getName());
  1840. return nullptr;
  1841. }
  1842. // Note that we may have also stripped an address space cast in between.
  1843. Instruction *InstCombinerImpl::visitGEPOfBitcast(BitCastInst *BCI,
  1844. GetElementPtrInst &GEP) {
  1845. // With opaque pointers, there is no pointer element type we can use to
  1846. // adjust the GEP type.
  1847. PointerType *SrcType = cast<PointerType>(BCI->getSrcTy());
  1848. if (SrcType->isOpaque())
  1849. return nullptr;
  1850. Type *GEPEltType = GEP.getSourceElementType();
  1851. Type *SrcEltType = SrcType->getNonOpaquePointerElementType();
  1852. Value *SrcOp = BCI->getOperand(0);
  1853. // GEP directly using the source operand if this GEP is accessing an element
  1854. // of a bitcasted pointer to vector or array of the same dimensions:
  1855. // gep (bitcast <c x ty>* X to [c x ty]*), Y, Z --> gep X, Y, Z
  1856. // gep (bitcast [c x ty]* X to <c x ty>*), Y, Z --> gep X, Y, Z
  1857. auto areMatchingArrayAndVecTypes = [](Type *ArrTy, Type *VecTy,
  1858. const DataLayout &DL) {
  1859. auto *VecVTy = cast<FixedVectorType>(VecTy);
  1860. return ArrTy->getArrayElementType() == VecVTy->getElementType() &&
  1861. ArrTy->getArrayNumElements() == VecVTy->getNumElements() &&
  1862. DL.getTypeAllocSize(ArrTy) == DL.getTypeAllocSize(VecTy);
  1863. };
  1864. if (GEP.getNumOperands() == 3 &&
  1865. ((GEPEltType->isArrayTy() && isa<FixedVectorType>(SrcEltType) &&
  1866. areMatchingArrayAndVecTypes(GEPEltType, SrcEltType, DL)) ||
  1867. (isa<FixedVectorType>(GEPEltType) && SrcEltType->isArrayTy() &&
  1868. areMatchingArrayAndVecTypes(SrcEltType, GEPEltType, DL)))) {
  1869. // Create a new GEP here, as using `setOperand()` followed by
  1870. // `setSourceElementType()` won't actually update the type of the
  1871. // existing GEP Value. Causing issues if this Value is accessed when
  1872. // constructing an AddrSpaceCastInst
  1873. SmallVector<Value *, 8> Indices(GEP.indices());
  1874. Value *NGEP = GEP.isInBounds()
  1875. ? Builder.CreateInBoundsGEP(SrcEltType, SrcOp, Indices)
  1876. : Builder.CreateGEP(SrcEltType, SrcOp, Indices);
  1877. NGEP->takeName(&GEP);
  1878. // Preserve GEP address space to satisfy users
  1879. if (NGEP->getType()->getPointerAddressSpace() != GEP.getAddressSpace())
  1880. return new AddrSpaceCastInst(NGEP, GEP.getType());
  1881. return replaceInstUsesWith(GEP, NGEP);
  1882. }
  1883. // See if we can simplify:
  1884. // X = bitcast A* to B*
  1885. // Y = gep X, <...constant indices...>
  1886. // into a gep of the original struct. This is important for SROA and alias
  1887. // analysis of unions. If "A" is also a bitcast, wait for A/X to be merged.
  1888. unsigned OffsetBits = DL.getIndexTypeSizeInBits(GEP.getType());
  1889. APInt Offset(OffsetBits, 0);
  1890. // If the bitcast argument is an allocation, The bitcast is for convertion
  1891. // to actual type of allocation. Removing such bitcasts, results in having
  1892. // GEPs with i8* base and pure byte offsets. That means GEP is not aware of
  1893. // struct or array hierarchy.
  1894. // By avoiding such GEPs, phi translation and MemoryDependencyAnalysis have
  1895. // a better chance to succeed.
  1896. if (!isa<BitCastInst>(SrcOp) && GEP.accumulateConstantOffset(DL, Offset) &&
  1897. !isAllocationFn(SrcOp, &TLI)) {
  1898. // If this GEP instruction doesn't move the pointer, just replace the GEP
  1899. // with a bitcast of the real input to the dest type.
  1900. if (!Offset) {
  1901. // If the bitcast is of an allocation, and the allocation will be
  1902. // converted to match the type of the cast, don't touch this.
  1903. if (isa<AllocaInst>(SrcOp)) {
  1904. // See if the bitcast simplifies, if so, don't nuke this GEP yet.
  1905. if (Instruction *I = visitBitCast(*BCI)) {
  1906. if (I != BCI) {
  1907. I->takeName(BCI);
  1908. BCI->getParent()->getInstList().insert(BCI->getIterator(), I);
  1909. replaceInstUsesWith(*BCI, I);
  1910. }
  1911. return &GEP;
  1912. }
  1913. }
  1914. if (SrcType->getPointerAddressSpace() != GEP.getAddressSpace())
  1915. return new AddrSpaceCastInst(SrcOp, GEP.getType());
  1916. return new BitCastInst(SrcOp, GEP.getType());
  1917. }
  1918. // Otherwise, if the offset is non-zero, we need to find out if there is a
  1919. // field at Offset in 'A's type. If so, we can pull the cast through the
  1920. // GEP.
  1921. SmallVector<Value*, 8> NewIndices;
  1922. if (findElementAtOffset(SrcType, Offset.getSExtValue(), NewIndices, DL)) {
  1923. Value *NGEP =
  1924. GEP.isInBounds()
  1925. ? Builder.CreateInBoundsGEP(SrcEltType, SrcOp, NewIndices)
  1926. : Builder.CreateGEP(SrcEltType, SrcOp, NewIndices);
  1927. if (NGEP->getType() == GEP.getType())
  1928. return replaceInstUsesWith(GEP, NGEP);
  1929. NGEP->takeName(&GEP);
  1930. if (NGEP->getType()->getPointerAddressSpace() != GEP.getAddressSpace())
  1931. return new AddrSpaceCastInst(NGEP, GEP.getType());
  1932. return new BitCastInst(NGEP, GEP.getType());
  1933. }
  1934. }
  1935. return nullptr;
  1936. }
  1937. Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
  1938. Value *PtrOp = GEP.getOperand(0);
  1939. SmallVector<Value *, 8> Indices(GEP.indices());
  1940. Type *GEPType = GEP.getType();
  1941. Type *GEPEltType = GEP.getSourceElementType();
  1942. bool IsGEPSrcEleScalable = isa<ScalableVectorType>(GEPEltType);
  1943. if (Value *V = SimplifyGEPInst(GEPEltType, PtrOp, Indices, GEP.isInBounds(),
  1944. SQ.getWithInstruction(&GEP)))
  1945. return replaceInstUsesWith(GEP, V);
  1946. // For vector geps, use the generic demanded vector support.
  1947. // Skip if GEP return type is scalable. The number of elements is unknown at
  1948. // compile-time.
  1949. if (auto *GEPFVTy = dyn_cast<FixedVectorType>(GEPType)) {
  1950. auto VWidth = GEPFVTy->getNumElements();
  1951. APInt UndefElts(VWidth, 0);
  1952. APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
  1953. if (Value *V = SimplifyDemandedVectorElts(&GEP, AllOnesEltMask,
  1954. UndefElts)) {
  1955. if (V != &GEP)
  1956. return replaceInstUsesWith(GEP, V);
  1957. return &GEP;
  1958. }
  1959. // TODO: 1) Scalarize splat operands, 2) scalarize entire instruction if
  1960. // possible (decide on canonical form for pointer broadcast), 3) exploit
  1961. // undef elements to decrease demanded bits
  1962. }
  1963. // Eliminate unneeded casts for indices, and replace indices which displace
  1964. // by multiples of a zero size type with zero.
  1965. bool MadeChange = false;
  1966. // Index width may not be the same width as pointer width.
  1967. // Data layout chooses the right type based on supported integer types.
  1968. Type *NewScalarIndexTy =
  1969. DL.getIndexType(GEP.getPointerOperandType()->getScalarType());
  1970. gep_type_iterator GTI = gep_type_begin(GEP);
  1971. for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E;
  1972. ++I, ++GTI) {
  1973. // Skip indices into struct types.
  1974. if (GTI.isStruct())
  1975. continue;
  1976. Type *IndexTy = (*I)->getType();
  1977. Type *NewIndexType =
  1978. IndexTy->isVectorTy()
  1979. ? VectorType::get(NewScalarIndexTy,
  1980. cast<VectorType>(IndexTy)->getElementCount())
  1981. : NewScalarIndexTy;
  1982. // If the element type has zero size then any index over it is equivalent
  1983. // to an index of zero, so replace it with zero if it is not zero already.
  1984. Type *EltTy = GTI.getIndexedType();
  1985. if (EltTy->isSized() && DL.getTypeAllocSize(EltTy).isZero())
  1986. if (!isa<Constant>(*I) || !match(I->get(), m_Zero())) {
  1987. *I = Constant::getNullValue(NewIndexType);
  1988. MadeChange = true;
  1989. }
  1990. if (IndexTy != NewIndexType) {
  1991. // If we are using a wider index than needed for this platform, shrink
  1992. // it to what we need. If narrower, sign-extend it to what we need.
  1993. // This explicit cast can make subsequent optimizations more obvious.
  1994. *I = Builder.CreateIntCast(*I, NewIndexType, true);
  1995. MadeChange = true;
  1996. }
  1997. }
  1998. if (MadeChange)
  1999. return &GEP;
  2000. // Check to see if the inputs to the PHI node are getelementptr instructions.
  2001. if (auto *PN = dyn_cast<PHINode>(PtrOp)) {
  2002. auto *Op1 = dyn_cast<GetElementPtrInst>(PN->getOperand(0));
  2003. if (!Op1)
  2004. return nullptr;
  2005. // Don't fold a GEP into itself through a PHI node. This can only happen
  2006. // through the back-edge of a loop. Folding a GEP into itself means that
  2007. // the value of the previous iteration needs to be stored in the meantime,
  2008. // thus requiring an additional register variable to be live, but not
  2009. // actually achieving anything (the GEP still needs to be executed once per
  2010. // loop iteration).
  2011. if (Op1 == &GEP)
  2012. return nullptr;
  2013. int DI = -1;
  2014. for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; ++I) {
  2015. auto *Op2 = dyn_cast<GetElementPtrInst>(*I);
  2016. if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands())
  2017. return nullptr;
  2018. // As for Op1 above, don't try to fold a GEP into itself.
  2019. if (Op2 == &GEP)
  2020. return nullptr;
  2021. // Keep track of the type as we walk the GEP.
  2022. Type *CurTy = nullptr;
  2023. for (unsigned J = 0, F = Op1->getNumOperands(); J != F; ++J) {
  2024. if (Op1->getOperand(J)->getType() != Op2->getOperand(J)->getType())
  2025. return nullptr;
  2026. if (Op1->getOperand(J) != Op2->getOperand(J)) {
  2027. if (DI == -1) {
  2028. // We have not seen any differences yet in the GEPs feeding the
  2029. // PHI yet, so we record this one if it is allowed to be a
  2030. // variable.
  2031. // The first two arguments can vary for any GEP, the rest have to be
  2032. // static for struct slots
  2033. if (J > 1) {
  2034. assert(CurTy && "No current type?");
  2035. if (CurTy->isStructTy())
  2036. return nullptr;
  2037. }
  2038. DI = J;
  2039. } else {
  2040. // The GEP is different by more than one input. While this could be
  2041. // extended to support GEPs that vary by more than one variable it
  2042. // doesn't make sense since it greatly increases the complexity and
  2043. // would result in an R+R+R addressing mode which no backend
  2044. // directly supports and would need to be broken into several
  2045. // simpler instructions anyway.
  2046. return nullptr;
  2047. }
  2048. }
  2049. // Sink down a layer of the type for the next iteration.
  2050. if (J > 0) {
  2051. if (J == 1) {
  2052. CurTy = Op1->getSourceElementType();
  2053. } else {
  2054. CurTy =
  2055. GetElementPtrInst::getTypeAtIndex(CurTy, Op1->getOperand(J));
  2056. }
  2057. }
  2058. }
  2059. }
  2060. // If not all GEPs are identical we'll have to create a new PHI node.
  2061. // Check that the old PHI node has only one use so that it will get
  2062. // removed.
  2063. if (DI != -1 && !PN->hasOneUse())
  2064. return nullptr;
  2065. auto *NewGEP = cast<GetElementPtrInst>(Op1->clone());
  2066. if (DI == -1) {
  2067. // All the GEPs feeding the PHI are identical. Clone one down into our
  2068. // BB so that it can be merged with the current GEP.
  2069. } else {
  2070. // All the GEPs feeding the PHI differ at a single offset. Clone a GEP
  2071. // into the current block so it can be merged, and create a new PHI to
  2072. // set that index.
  2073. PHINode *NewPN;
  2074. {
  2075. IRBuilderBase::InsertPointGuard Guard(Builder);
  2076. Builder.SetInsertPoint(PN);
  2077. NewPN = Builder.CreatePHI(Op1->getOperand(DI)->getType(),
  2078. PN->getNumOperands());
  2079. }
  2080. for (auto &I : PN->operands())
  2081. NewPN->addIncoming(cast<GEPOperator>(I)->getOperand(DI),
  2082. PN->getIncomingBlock(I));
  2083. NewGEP->setOperand(DI, NewPN);
  2084. }
  2085. GEP.getParent()->getInstList().insert(
  2086. GEP.getParent()->getFirstInsertionPt(), NewGEP);
  2087. replaceOperand(GEP, 0, NewGEP);
  2088. PtrOp = NewGEP;
  2089. }
  2090. if (auto *Src = dyn_cast<GEPOperator>(PtrOp))
  2091. if (Instruction *I = visitGEPOfGEP(GEP, Src))
  2092. return I;
  2093. // Skip if GEP source element type is scalable. The type alloc size is unknown
  2094. // at compile-time.
  2095. if (GEP.getNumIndices() == 1 && !IsGEPSrcEleScalable) {
  2096. unsigned AS = GEP.getPointerAddressSpace();
  2097. if (GEP.getOperand(1)->getType()->getScalarSizeInBits() ==
  2098. DL.getIndexSizeInBits(AS)) {
  2099. uint64_t TyAllocSize = DL.getTypeAllocSize(GEPEltType).getFixedSize();
  2100. bool Matched = false;
  2101. uint64_t C;
  2102. Value *V = nullptr;
  2103. if (TyAllocSize == 1) {
  2104. V = GEP.getOperand(1);
  2105. Matched = true;
  2106. } else if (match(GEP.getOperand(1),
  2107. m_AShr(m_Value(V), m_ConstantInt(C)))) {
  2108. if (TyAllocSize == 1ULL << C)
  2109. Matched = true;
  2110. } else if (match(GEP.getOperand(1),
  2111. m_SDiv(m_Value(V), m_ConstantInt(C)))) {
  2112. if (TyAllocSize == C)
  2113. Matched = true;
  2114. }
  2115. // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X)) to (bitcast Y), but
  2116. // only if both point to the same underlying object (otherwise provenance
  2117. // is not necessarily retained).
  2118. Value *Y;
  2119. Value *X = GEP.getOperand(0);
  2120. if (Matched &&
  2121. match(V, m_Sub(m_PtrToInt(m_Value(Y)), m_PtrToInt(m_Specific(X)))) &&
  2122. getUnderlyingObject(X) == getUnderlyingObject(Y))
  2123. return CastInst::CreatePointerBitCastOrAddrSpaceCast(Y, GEPType);
  2124. }
  2125. }
  2126. // We do not handle pointer-vector geps here.
  2127. if (GEPType->isVectorTy())
  2128. return nullptr;
  2129. // Handle gep(bitcast x) and gep(gep x, 0, 0, 0).
  2130. Value *StrippedPtr = PtrOp->stripPointerCasts();
  2131. PointerType *StrippedPtrTy = cast<PointerType>(StrippedPtr->getType());
  2132. // TODO: The basic approach of these folds is not compatible with opaque
  2133. // pointers, because we can't use bitcasts as a hint for a desirable GEP
  2134. // type. Instead, we should perform canonicalization directly on the GEP
  2135. // type. For now, skip these.
  2136. if (StrippedPtr != PtrOp && !StrippedPtrTy->isOpaque()) {
  2137. bool HasZeroPointerIndex = false;
  2138. Type *StrippedPtrEltTy = StrippedPtrTy->getNonOpaquePointerElementType();
  2139. if (auto *C = dyn_cast<ConstantInt>(GEP.getOperand(1)))
  2140. HasZeroPointerIndex = C->isZero();
  2141. // Transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ...
  2142. // into : GEP [10 x i8]* X, i32 0, ...
  2143. //
  2144. // Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ...
  2145. // into : GEP i8* X, ...
  2146. //
  2147. // This occurs when the program declares an array extern like "int X[];"
  2148. if (HasZeroPointerIndex) {
  2149. if (auto *CATy = dyn_cast<ArrayType>(GEPEltType)) {
  2150. // GEP (bitcast i8* X to [0 x i8]*), i32 0, ... ?
  2151. if (CATy->getElementType() == StrippedPtrEltTy) {
  2152. // -> GEP i8* X, ...
  2153. SmallVector<Value *, 8> Idx(drop_begin(GEP.indices()));
  2154. GetElementPtrInst *Res = GetElementPtrInst::Create(
  2155. StrippedPtrEltTy, StrippedPtr, Idx, GEP.getName());
  2156. Res->setIsInBounds(GEP.isInBounds());
  2157. if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace())
  2158. return Res;
  2159. // Insert Res, and create an addrspacecast.
  2160. // e.g.,
  2161. // GEP (addrspacecast i8 addrspace(1)* X to [0 x i8]*), i32 0, ...
  2162. // ->
  2163. // %0 = GEP i8 addrspace(1)* X, ...
  2164. // addrspacecast i8 addrspace(1)* %0 to i8*
  2165. return new AddrSpaceCastInst(Builder.Insert(Res), GEPType);
  2166. }
  2167. if (auto *XATy = dyn_cast<ArrayType>(StrippedPtrEltTy)) {
  2168. // GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ?
  2169. if (CATy->getElementType() == XATy->getElementType()) {
  2170. // -> GEP [10 x i8]* X, i32 0, ...
  2171. // At this point, we know that the cast source type is a pointer
  2172. // to an array of the same type as the destination pointer
  2173. // array. Because the array type is never stepped over (there
  2174. // is a leading zero) we can fold the cast into this GEP.
  2175. if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace()) {
  2176. GEP.setSourceElementType(XATy);
  2177. return replaceOperand(GEP, 0, StrippedPtr);
  2178. }
  2179. // Cannot replace the base pointer directly because StrippedPtr's
  2180. // address space is different. Instead, create a new GEP followed by
  2181. // an addrspacecast.
  2182. // e.g.,
  2183. // GEP (addrspacecast [10 x i8] addrspace(1)* X to [0 x i8]*),
  2184. // i32 0, ...
  2185. // ->
  2186. // %0 = GEP [10 x i8] addrspace(1)* X, ...
  2187. // addrspacecast i8 addrspace(1)* %0 to i8*
  2188. SmallVector<Value *, 8> Idx(GEP.indices());
  2189. Value *NewGEP =
  2190. GEP.isInBounds()
  2191. ? Builder.CreateInBoundsGEP(StrippedPtrEltTy, StrippedPtr,
  2192. Idx, GEP.getName())
  2193. : Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Idx,
  2194. GEP.getName());
  2195. return new AddrSpaceCastInst(NewGEP, GEPType);
  2196. }
  2197. }
  2198. }
  2199. } else if (GEP.getNumOperands() == 2 && !IsGEPSrcEleScalable) {
  2200. // Skip if GEP source element type is scalable. The type alloc size is
  2201. // unknown at compile-time.
  2202. // Transform things like: %t = getelementptr i32*
  2203. // bitcast ([2 x i32]* %str to i32*), i32 %V into: %t1 = getelementptr [2
  2204. // x i32]* %str, i32 0, i32 %V; bitcast
  2205. if (StrippedPtrEltTy->isArrayTy() &&
  2206. DL.getTypeAllocSize(StrippedPtrEltTy->getArrayElementType()) ==
  2207. DL.getTypeAllocSize(GEPEltType)) {
  2208. Type *IdxType = DL.getIndexType(GEPType);
  2209. Value *Idx[2] = { Constant::getNullValue(IdxType), GEP.getOperand(1) };
  2210. Value *NewGEP =
  2211. GEP.isInBounds()
  2212. ? Builder.CreateInBoundsGEP(StrippedPtrEltTy, StrippedPtr, Idx,
  2213. GEP.getName())
  2214. : Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Idx,
  2215. GEP.getName());
  2216. // V and GEP are both pointer types --> BitCast
  2217. return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP, GEPType);
  2218. }
  2219. // Transform things like:
  2220. // %V = mul i64 %N, 4
  2221. // %t = getelementptr i8* bitcast (i32* %arr to i8*), i32 %V
  2222. // into: %t1 = getelementptr i32* %arr, i32 %N; bitcast
  2223. if (GEPEltType->isSized() && StrippedPtrEltTy->isSized()) {
  2224. // Check that changing the type amounts to dividing the index by a scale
  2225. // factor.
  2226. uint64_t ResSize = DL.getTypeAllocSize(GEPEltType).getFixedSize();
  2227. uint64_t SrcSize = DL.getTypeAllocSize(StrippedPtrEltTy).getFixedSize();
  2228. if (ResSize && SrcSize % ResSize == 0) {
  2229. Value *Idx = GEP.getOperand(1);
  2230. unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
  2231. uint64_t Scale = SrcSize / ResSize;
  2232. // Earlier transforms ensure that the index has the right type
  2233. // according to Data Layout, which considerably simplifies the
  2234. // logic by eliminating implicit casts.
  2235. assert(Idx->getType() == DL.getIndexType(GEPType) &&
  2236. "Index type does not match the Data Layout preferences");
  2237. bool NSW;
  2238. if (Value *NewIdx = Descale(Idx, APInt(BitWidth, Scale), NSW)) {
  2239. // Successfully decomposed Idx as NewIdx * Scale, form a new GEP.
  2240. // If the multiplication NewIdx * Scale may overflow then the new
  2241. // GEP may not be "inbounds".
  2242. Value *NewGEP =
  2243. GEP.isInBounds() && NSW
  2244. ? Builder.CreateInBoundsGEP(StrippedPtrEltTy, StrippedPtr,
  2245. NewIdx, GEP.getName())
  2246. : Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, NewIdx,
  2247. GEP.getName());
  2248. // The NewGEP must be pointer typed, so must the old one -> BitCast
  2249. return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP,
  2250. GEPType);
  2251. }
  2252. }
  2253. }
  2254. // Similarly, transform things like:
  2255. // getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp
  2256. // (where tmp = 8*tmp2) into:
  2257. // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast
  2258. if (GEPEltType->isSized() && StrippedPtrEltTy->isSized() &&
  2259. StrippedPtrEltTy->isArrayTy()) {
  2260. // Check that changing to the array element type amounts to dividing the
  2261. // index by a scale factor.
  2262. uint64_t ResSize = DL.getTypeAllocSize(GEPEltType).getFixedSize();
  2263. uint64_t ArrayEltSize =
  2264. DL.getTypeAllocSize(StrippedPtrEltTy->getArrayElementType())
  2265. .getFixedSize();
  2266. if (ResSize && ArrayEltSize % ResSize == 0) {
  2267. Value *Idx = GEP.getOperand(1);
  2268. unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
  2269. uint64_t Scale = ArrayEltSize / ResSize;
  2270. // Earlier transforms ensure that the index has the right type
  2271. // according to the Data Layout, which considerably simplifies
  2272. // the logic by eliminating implicit casts.
  2273. assert(Idx->getType() == DL.getIndexType(GEPType) &&
  2274. "Index type does not match the Data Layout preferences");
  2275. bool NSW;
  2276. if (Value *NewIdx = Descale(Idx, APInt(BitWidth, Scale), NSW)) {
  2277. // Successfully decomposed Idx as NewIdx * Scale, form a new GEP.
  2278. // If the multiplication NewIdx * Scale may overflow then the new
  2279. // GEP may not be "inbounds".
  2280. Type *IndTy = DL.getIndexType(GEPType);
  2281. Value *Off[2] = {Constant::getNullValue(IndTy), NewIdx};
  2282. Value *NewGEP =
  2283. GEP.isInBounds() && NSW
  2284. ? Builder.CreateInBoundsGEP(StrippedPtrEltTy, StrippedPtr,
  2285. Off, GEP.getName())
  2286. : Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Off,
  2287. GEP.getName());
  2288. // The NewGEP must be pointer typed, so must the old one -> BitCast
  2289. return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP,
  2290. GEPType);
  2291. }
  2292. }
  2293. }
  2294. }
  2295. }
  2296. // addrspacecast between types is canonicalized as a bitcast, then an
  2297. // addrspacecast. To take advantage of the below bitcast + struct GEP, look
  2298. // through the addrspacecast.
  2299. Value *ASCStrippedPtrOp = PtrOp;
  2300. if (auto *ASC = dyn_cast<AddrSpaceCastInst>(PtrOp)) {
  2301. // X = bitcast A addrspace(1)* to B addrspace(1)*
  2302. // Y = addrspacecast A addrspace(1)* to B addrspace(2)*
  2303. // Z = gep Y, <...constant indices...>
  2304. // Into an addrspacecasted GEP of the struct.
  2305. if (auto *BC = dyn_cast<BitCastInst>(ASC->getOperand(0)))
  2306. ASCStrippedPtrOp = BC;
  2307. }
  2308. if (auto *BCI = dyn_cast<BitCastInst>(ASCStrippedPtrOp))
  2309. if (Instruction *I = visitGEPOfBitcast(BCI, GEP))
  2310. return I;
  2311. if (!GEP.isInBounds()) {
  2312. unsigned IdxWidth =
  2313. DL.getIndexSizeInBits(PtrOp->getType()->getPointerAddressSpace());
  2314. APInt BasePtrOffset(IdxWidth, 0);
  2315. Value *UnderlyingPtrOp =
  2316. PtrOp->stripAndAccumulateInBoundsConstantOffsets(DL,
  2317. BasePtrOffset);
  2318. if (auto *AI = dyn_cast<AllocaInst>(UnderlyingPtrOp)) {
  2319. if (GEP.accumulateConstantOffset(DL, BasePtrOffset) &&
  2320. BasePtrOffset.isNonNegative()) {
  2321. APInt AllocSize(
  2322. IdxWidth,
  2323. DL.getTypeAllocSize(AI->getAllocatedType()).getKnownMinSize());
  2324. if (BasePtrOffset.ule(AllocSize)) {
  2325. return GetElementPtrInst::CreateInBounds(
  2326. GEP.getSourceElementType(), PtrOp, Indices, GEP.getName());
  2327. }
  2328. }
  2329. }
  2330. }
  2331. if (Instruction *R = foldSelectGEP(GEP, Builder))
  2332. return R;
  2333. return nullptr;
  2334. }
  2335. static bool isNeverEqualToUnescapedAlloc(Value *V, const TargetLibraryInfo &TLI,
  2336. Instruction *AI) {
  2337. if (isa<ConstantPointerNull>(V))
  2338. return true;
  2339. if (auto *LI = dyn_cast<LoadInst>(V))
  2340. return isa<GlobalVariable>(LI->getPointerOperand());
  2341. // Two distinct allocations will never be equal.
  2342. return isAllocLikeFn(V, &TLI) && V != AI;
  2343. }
  2344. /// Given a call CB which uses an address UsedV, return true if we can prove the
  2345. /// call's only possible effect is storing to V.
  2346. static bool isRemovableWrite(CallBase &CB, Value *UsedV,
  2347. const TargetLibraryInfo &TLI) {
  2348. if (!CB.use_empty())
  2349. // TODO: add recursion if returned attribute is present
  2350. return false;
  2351. if (CB.isTerminator())
  2352. // TODO: remove implementation restriction
  2353. return false;
  2354. if (!CB.willReturn() || !CB.doesNotThrow())
  2355. return false;
  2356. // If the only possible side effect of the call is writing to the alloca,
  2357. // and the result isn't used, we can safely remove any reads implied by the
  2358. // call including those which might read the alloca itself.
  2359. Optional<MemoryLocation> Dest = MemoryLocation::getForDest(&CB, TLI);
  2360. return Dest && Dest->Ptr == UsedV;
  2361. }
  2362. static bool isAllocSiteRemovable(Instruction *AI,
  2363. SmallVectorImpl<WeakTrackingVH> &Users,
  2364. const TargetLibraryInfo &TLI) {
  2365. SmallVector<Instruction*, 4> Worklist;
  2366. Worklist.push_back(AI);
  2367. do {
  2368. Instruction *PI = Worklist.pop_back_val();
  2369. for (User *U : PI->users()) {
  2370. Instruction *I = cast<Instruction>(U);
  2371. switch (I->getOpcode()) {
  2372. default:
  2373. // Give up the moment we see something we can't handle.
  2374. return false;
  2375. case Instruction::AddrSpaceCast:
  2376. case Instruction::BitCast:
  2377. case Instruction::GetElementPtr:
  2378. Users.emplace_back(I);
  2379. Worklist.push_back(I);
  2380. continue;
  2381. case Instruction::ICmp: {
  2382. ICmpInst *ICI = cast<ICmpInst>(I);
  2383. // We can fold eq/ne comparisons with null to false/true, respectively.
  2384. // We also fold comparisons in some conditions provided the alloc has
  2385. // not escaped (see isNeverEqualToUnescapedAlloc).
  2386. if (!ICI->isEquality())
  2387. return false;
  2388. unsigned OtherIndex = (ICI->getOperand(0) == PI) ? 1 : 0;
  2389. if (!isNeverEqualToUnescapedAlloc(ICI->getOperand(OtherIndex), TLI, AI))
  2390. return false;
  2391. Users.emplace_back(I);
  2392. continue;
  2393. }
  2394. case Instruction::Call:
  2395. // Ignore no-op and store intrinsics.
  2396. if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
  2397. switch (II->getIntrinsicID()) {
  2398. default:
  2399. return false;
  2400. case Intrinsic::memmove:
  2401. case Intrinsic::memcpy:
  2402. case Intrinsic::memset: {
  2403. MemIntrinsic *MI = cast<MemIntrinsic>(II);
  2404. if (MI->isVolatile() || MI->getRawDest() != PI)
  2405. return false;
  2406. LLVM_FALLTHROUGH;
  2407. }
  2408. case Intrinsic::assume:
  2409. case Intrinsic::invariant_start:
  2410. case Intrinsic::invariant_end:
  2411. case Intrinsic::lifetime_start:
  2412. case Intrinsic::lifetime_end:
  2413. case Intrinsic::objectsize:
  2414. Users.emplace_back(I);
  2415. continue;
  2416. case Intrinsic::launder_invariant_group:
  2417. case Intrinsic::strip_invariant_group:
  2418. Users.emplace_back(I);
  2419. Worklist.push_back(I);
  2420. continue;
  2421. }
  2422. }
  2423. if (isRemovableWrite(*cast<CallBase>(I), PI, TLI)) {
  2424. Users.emplace_back(I);
  2425. continue;
  2426. }
  2427. if (isFreeCall(I, &TLI)) {
  2428. Users.emplace_back(I);
  2429. continue;
  2430. }
  2431. if (isReallocLikeFn(I, &TLI)) {
  2432. Users.emplace_back(I);
  2433. Worklist.push_back(I);
  2434. continue;
  2435. }
  2436. return false;
  2437. case Instruction::Store: {
  2438. StoreInst *SI = cast<StoreInst>(I);
  2439. if (SI->isVolatile() || SI->getPointerOperand() != PI)
  2440. return false;
  2441. Users.emplace_back(I);
  2442. continue;
  2443. }
  2444. }
  2445. llvm_unreachable("missing a return?");
  2446. }
  2447. } while (!Worklist.empty());
  2448. return true;
  2449. }
  2450. Instruction *InstCombinerImpl::visitAllocSite(Instruction &MI) {
  2451. assert(isa<AllocaInst>(MI) || isAllocRemovable(&cast<CallBase>(MI), &TLI));
  2452. // If we have a malloc call which is only used in any amount of comparisons to
  2453. // null and free calls, delete the calls and replace the comparisons with true
  2454. // or false as appropriate.
  2455. // This is based on the principle that we can substitute our own allocation
  2456. // function (which will never return null) rather than knowledge of the
  2457. // specific function being called. In some sense this can change the permitted
  2458. // outputs of a program (when we convert a malloc to an alloca, the fact that
  2459. // the allocation is now on the stack is potentially visible, for example),
  2460. // but we believe in a permissible manner.
  2461. SmallVector<WeakTrackingVH, 64> Users;
  2462. // If we are removing an alloca with a dbg.declare, insert dbg.value calls
  2463. // before each store.
  2464. SmallVector<DbgVariableIntrinsic *, 8> DVIs;
  2465. std::unique_ptr<DIBuilder> DIB;
  2466. if (isa<AllocaInst>(MI)) {
  2467. findDbgUsers(DVIs, &MI);
  2468. DIB.reset(new DIBuilder(*MI.getModule(), /*AllowUnresolved=*/false));
  2469. }
  2470. if (isAllocSiteRemovable(&MI, Users, TLI)) {
  2471. for (unsigned i = 0, e = Users.size(); i != e; ++i) {
  2472. // Lowering all @llvm.objectsize calls first because they may
  2473. // use a bitcast/GEP of the alloca we are removing.
  2474. if (!Users[i])
  2475. continue;
  2476. Instruction *I = cast<Instruction>(&*Users[i]);
  2477. if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
  2478. if (II->getIntrinsicID() == Intrinsic::objectsize) {
  2479. Value *Result =
  2480. lowerObjectSizeCall(II, DL, &TLI, /*MustSucceed=*/true);
  2481. replaceInstUsesWith(*I, Result);
  2482. eraseInstFromFunction(*I);
  2483. Users[i] = nullptr; // Skip examining in the next loop.
  2484. }
  2485. }
  2486. }
  2487. for (unsigned i = 0, e = Users.size(); i != e; ++i) {
  2488. if (!Users[i])
  2489. continue;
  2490. Instruction *I = cast<Instruction>(&*Users[i]);
  2491. if (ICmpInst *C = dyn_cast<ICmpInst>(I)) {
  2492. replaceInstUsesWith(*C,
  2493. ConstantInt::get(Type::getInt1Ty(C->getContext()),
  2494. C->isFalseWhenEqual()));
  2495. } else if (auto *SI = dyn_cast<StoreInst>(I)) {
  2496. for (auto *DVI : DVIs)
  2497. if (DVI->isAddressOfVariable())
  2498. ConvertDebugDeclareToDebugValue(DVI, SI, *DIB);
  2499. } else {
  2500. // Casts, GEP, or anything else: we're about to delete this instruction,
  2501. // so it can not have any valid uses.
  2502. replaceInstUsesWith(*I, PoisonValue::get(I->getType()));
  2503. }
  2504. eraseInstFromFunction(*I);
  2505. }
  2506. if (InvokeInst *II = dyn_cast<InvokeInst>(&MI)) {
  2507. // Replace invoke with a NOP intrinsic to maintain the original CFG
  2508. Module *M = II->getModule();
  2509. Function *F = Intrinsic::getDeclaration(M, Intrinsic::donothing);
  2510. InvokeInst::Create(F, II->getNormalDest(), II->getUnwindDest(),
  2511. None, "", II->getParent());
  2512. }
  2513. // Remove debug intrinsics which describe the value contained within the
  2514. // alloca. In addition to removing dbg.{declare,addr} which simply point to
  2515. // the alloca, remove dbg.value(<alloca>, ..., DW_OP_deref)'s as well, e.g.:
  2516. //
  2517. // ```
  2518. // define void @foo(i32 %0) {
  2519. // %a = alloca i32 ; Deleted.
  2520. // store i32 %0, i32* %a
  2521. // dbg.value(i32 %0, "arg0") ; Not deleted.
  2522. // dbg.value(i32* %a, "arg0", DW_OP_deref) ; Deleted.
  2523. // call void @trivially_inlinable_no_op(i32* %a)
  2524. // ret void
  2525. // }
  2526. // ```
  2527. //
  2528. // This may not be required if we stop describing the contents of allocas
  2529. // using dbg.value(<alloca>, ..., DW_OP_deref), but we currently do this in
  2530. // the LowerDbgDeclare utility.
  2531. //
  2532. // If there is a dead store to `%a` in @trivially_inlinable_no_op, the
  2533. // "arg0" dbg.value may be stale after the call. However, failing to remove
  2534. // the DW_OP_deref dbg.value causes large gaps in location coverage.
  2535. for (auto *DVI : DVIs)
  2536. if (DVI->isAddressOfVariable() || DVI->getExpression()->startsWithDeref())
  2537. DVI->eraseFromParent();
  2538. return eraseInstFromFunction(MI);
  2539. }
  2540. return nullptr;
  2541. }
  2542. /// Move the call to free before a NULL test.
  2543. ///
  2544. /// Check if this free is accessed after its argument has been test
  2545. /// against NULL (property 0).
  2546. /// If yes, it is legal to move this call in its predecessor block.
  2547. ///
  2548. /// The move is performed only if the block containing the call to free
  2549. /// will be removed, i.e.:
  2550. /// 1. it has only one predecessor P, and P has two successors
  2551. /// 2. it contains the call, noops, and an unconditional branch
  2552. /// 3. its successor is the same as its predecessor's successor
  2553. ///
  2554. /// The profitability is out-of concern here and this function should
  2555. /// be called only if the caller knows this transformation would be
  2556. /// profitable (e.g., for code size).
  2557. static Instruction *tryToMoveFreeBeforeNullTest(CallInst &FI,
  2558. const DataLayout &DL) {
  2559. Value *Op = FI.getArgOperand(0);
  2560. BasicBlock *FreeInstrBB = FI.getParent();
  2561. BasicBlock *PredBB = FreeInstrBB->getSinglePredecessor();
  2562. // Validate part of constraint #1: Only one predecessor
  2563. // FIXME: We can extend the number of predecessor, but in that case, we
  2564. // would duplicate the call to free in each predecessor and it may
  2565. // not be profitable even for code size.
  2566. if (!PredBB)
  2567. return nullptr;
  2568. // Validate constraint #2: Does this block contains only the call to
  2569. // free, noops, and an unconditional branch?
  2570. BasicBlock *SuccBB;
  2571. Instruction *FreeInstrBBTerminator = FreeInstrBB->getTerminator();
  2572. if (!match(FreeInstrBBTerminator, m_UnconditionalBr(SuccBB)))
  2573. return nullptr;
  2574. // If there are only 2 instructions in the block, at this point,
  2575. // this is the call to free and unconditional.
  2576. // If there are more than 2 instructions, check that they are noops
  2577. // i.e., they won't hurt the performance of the generated code.
  2578. if (FreeInstrBB->size() != 2) {
  2579. for (const Instruction &Inst : FreeInstrBB->instructionsWithoutDebug()) {
  2580. if (&Inst == &FI || &Inst == FreeInstrBBTerminator)
  2581. continue;
  2582. auto *Cast = dyn_cast<CastInst>(&Inst);
  2583. if (!Cast || !Cast->isNoopCast(DL))
  2584. return nullptr;
  2585. }
  2586. }
  2587. // Validate the rest of constraint #1 by matching on the pred branch.
  2588. Instruction *TI = PredBB->getTerminator();
  2589. BasicBlock *TrueBB, *FalseBB;
  2590. ICmpInst::Predicate Pred;
  2591. if (!match(TI, m_Br(m_ICmp(Pred,
  2592. m_CombineOr(m_Specific(Op),
  2593. m_Specific(Op->stripPointerCasts())),
  2594. m_Zero()),
  2595. TrueBB, FalseBB)))
  2596. return nullptr;
  2597. if (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE)
  2598. return nullptr;
  2599. // Validate constraint #3: Ensure the null case just falls through.
  2600. if (SuccBB != (Pred == ICmpInst::ICMP_EQ ? TrueBB : FalseBB))
  2601. return nullptr;
  2602. assert(FreeInstrBB == (Pred == ICmpInst::ICMP_EQ ? FalseBB : TrueBB) &&
  2603. "Broken CFG: missing edge from predecessor to successor");
  2604. // At this point, we know that everything in FreeInstrBB can be moved
  2605. // before TI.
  2606. for (Instruction &Instr : llvm::make_early_inc_range(*FreeInstrBB)) {
  2607. if (&Instr == FreeInstrBBTerminator)
  2608. break;
  2609. Instr.moveBefore(TI);
  2610. }
  2611. assert(FreeInstrBB->size() == 1 &&
  2612. "Only the branch instruction should remain");
  2613. // Now that we've moved the call to free before the NULL check, we have to
  2614. // remove any attributes on its parameter that imply it's non-null, because
  2615. // those attributes might have only been valid because of the NULL check, and
  2616. // we can get miscompiles if we keep them. This is conservative if non-null is
  2617. // also implied by something other than the NULL check, but it's guaranteed to
  2618. // be correct, and the conservativeness won't matter in practice, since the
  2619. // attributes are irrelevant for the call to free itself and the pointer
  2620. // shouldn't be used after the call.
  2621. AttributeList Attrs = FI.getAttributes();
  2622. Attrs = Attrs.removeParamAttribute(FI.getContext(), 0, Attribute::NonNull);
  2623. Attribute Dereferenceable = Attrs.getParamAttr(0, Attribute::Dereferenceable);
  2624. if (Dereferenceable.isValid()) {
  2625. uint64_t Bytes = Dereferenceable.getDereferenceableBytes();
  2626. Attrs = Attrs.removeParamAttribute(FI.getContext(), 0,
  2627. Attribute::Dereferenceable);
  2628. Attrs = Attrs.addDereferenceableOrNullParamAttr(FI.getContext(), 0, Bytes);
  2629. }
  2630. FI.setAttributes(Attrs);
  2631. return &FI;
  2632. }
  2633. Instruction *InstCombinerImpl::visitFree(CallInst &FI) {
  2634. Value *Op = FI.getArgOperand(0);
  2635. // free undef -> unreachable.
  2636. if (isa<UndefValue>(Op)) {
  2637. // Leave a marker since we can't modify the CFG here.
  2638. CreateNonTerminatorUnreachable(&FI);
  2639. return eraseInstFromFunction(FI);
  2640. }
  2641. // If we have 'free null' delete the instruction. This can happen in stl code
  2642. // when lots of inlining happens.
  2643. if (isa<ConstantPointerNull>(Op))
  2644. return eraseInstFromFunction(FI);
  2645. // If we had free(realloc(...)) with no intervening uses, then eliminate the
  2646. // realloc() entirely.
  2647. if (CallInst *CI = dyn_cast<CallInst>(Op)) {
  2648. if (CI->hasOneUse() && isReallocLikeFn(CI, &TLI)) {
  2649. return eraseInstFromFunction(
  2650. *replaceInstUsesWith(*CI, CI->getOperand(0)));
  2651. }
  2652. }
  2653. // If we optimize for code size, try to move the call to free before the null
  2654. // test so that simplify cfg can remove the empty block and dead code
  2655. // elimination the branch. I.e., helps to turn something like:
  2656. // if (foo) free(foo);
  2657. // into
  2658. // free(foo);
  2659. //
  2660. // Note that we can only do this for 'free' and not for any flavor of
  2661. // 'operator delete'; there is no 'operator delete' symbol for which we are
  2662. // permitted to invent a call, even if we're passing in a null pointer.
  2663. if (MinimizeSize) {
  2664. LibFunc Func;
  2665. if (TLI.getLibFunc(FI, Func) && TLI.has(Func) && Func == LibFunc_free)
  2666. if (Instruction *I = tryToMoveFreeBeforeNullTest(FI, DL))
  2667. return I;
  2668. }
  2669. return nullptr;
  2670. }
  2671. static bool isMustTailCall(Value *V) {
  2672. if (auto *CI = dyn_cast<CallInst>(V))
  2673. return CI->isMustTailCall();
  2674. return false;
  2675. }
  2676. Instruction *InstCombinerImpl::visitReturnInst(ReturnInst &RI) {
  2677. if (RI.getNumOperands() == 0) // ret void
  2678. return nullptr;
  2679. Value *ResultOp = RI.getOperand(0);
  2680. Type *VTy = ResultOp->getType();
  2681. if (!VTy->isIntegerTy() || isa<Constant>(ResultOp))
  2682. return nullptr;
  2683. // Don't replace result of musttail calls.
  2684. if (isMustTailCall(ResultOp))
  2685. return nullptr;
  2686. // There might be assume intrinsics dominating this return that completely
  2687. // determine the value. If so, constant fold it.
  2688. KnownBits Known = computeKnownBits(ResultOp, 0, &RI);
  2689. if (Known.isConstant())
  2690. return replaceOperand(RI, 0,
  2691. Constant::getIntegerValue(VTy, Known.getConstant()));
  2692. return nullptr;
  2693. }
  2694. // WARNING: keep in sync with SimplifyCFGOpt::simplifyUnreachable()!
  2695. Instruction *InstCombinerImpl::visitUnreachableInst(UnreachableInst &I) {
  2696. // Try to remove the previous instruction if it must lead to unreachable.
  2697. // This includes instructions like stores and "llvm.assume" that may not get
  2698. // removed by simple dead code elimination.
  2699. while (Instruction *Prev = I.getPrevNonDebugInstruction()) {
  2700. // While we theoretically can erase EH, that would result in a block that
  2701. // used to start with an EH no longer starting with EH, which is invalid.
  2702. // To make it valid, we'd need to fixup predecessors to no longer refer to
  2703. // this block, but that changes CFG, which is not allowed in InstCombine.
  2704. if (Prev->isEHPad())
  2705. return nullptr; // Can not drop any more instructions. We're done here.
  2706. if (!isGuaranteedToTransferExecutionToSuccessor(Prev))
  2707. return nullptr; // Can not drop any more instructions. We're done here.
  2708. // Otherwise, this instruction can be freely erased,
  2709. // even if it is not side-effect free.
  2710. // A value may still have uses before we process it here (for example, in
  2711. // another unreachable block), so convert those to poison.
  2712. replaceInstUsesWith(*Prev, PoisonValue::get(Prev->getType()));
  2713. eraseInstFromFunction(*Prev);
  2714. }
  2715. assert(I.getParent()->sizeWithoutDebug() == 1 && "The block is now empty.");
  2716. // FIXME: recurse into unconditional predecessors?
  2717. return nullptr;
  2718. }
  2719. Instruction *InstCombinerImpl::visitUnconditionalBranchInst(BranchInst &BI) {
  2720. assert(BI.isUnconditional() && "Only for unconditional branches.");
  2721. // If this store is the second-to-last instruction in the basic block
  2722. // (excluding debug info and bitcasts of pointers) and if the block ends with
  2723. // an unconditional branch, try to move the store to the successor block.
  2724. auto GetLastSinkableStore = [](BasicBlock::iterator BBI) {
  2725. auto IsNoopInstrForStoreMerging = [](BasicBlock::iterator BBI) {
  2726. return BBI->isDebugOrPseudoInst() ||
  2727. (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy());
  2728. };
  2729. BasicBlock::iterator FirstInstr = BBI->getParent()->begin();
  2730. do {
  2731. if (BBI != FirstInstr)
  2732. --BBI;
  2733. } while (BBI != FirstInstr && IsNoopInstrForStoreMerging(BBI));
  2734. return dyn_cast<StoreInst>(BBI);
  2735. };
  2736. if (StoreInst *SI = GetLastSinkableStore(BasicBlock::iterator(BI)))
  2737. if (mergeStoreIntoSuccessor(*SI))
  2738. return &BI;
  2739. return nullptr;
  2740. }
  2741. Instruction *InstCombinerImpl::visitBranchInst(BranchInst &BI) {
  2742. if (BI.isUnconditional())
  2743. return visitUnconditionalBranchInst(BI);
  2744. // Change br (not X), label True, label False to: br X, label False, True
  2745. Value *X = nullptr;
  2746. if (match(&BI, m_Br(m_Not(m_Value(X)), m_BasicBlock(), m_BasicBlock())) &&
  2747. !isa<Constant>(X)) {
  2748. // Swap Destinations and condition...
  2749. BI.swapSuccessors();
  2750. return replaceOperand(BI, 0, X);
  2751. }
  2752. // If the condition is irrelevant, remove the use so that other
  2753. // transforms on the condition become more effective.
  2754. if (!isa<ConstantInt>(BI.getCondition()) &&
  2755. BI.getSuccessor(0) == BI.getSuccessor(1))
  2756. return replaceOperand(
  2757. BI, 0, ConstantInt::getFalse(BI.getCondition()->getType()));
  2758. // Canonicalize, for example, fcmp_one -> fcmp_oeq.
  2759. CmpInst::Predicate Pred;
  2760. if (match(&BI, m_Br(m_OneUse(m_FCmp(Pred, m_Value(), m_Value())),
  2761. m_BasicBlock(), m_BasicBlock())) &&
  2762. !isCanonicalPredicate(Pred)) {
  2763. // Swap destinations and condition.
  2764. CmpInst *Cond = cast<CmpInst>(BI.getCondition());
  2765. Cond->setPredicate(CmpInst::getInversePredicate(Pred));
  2766. BI.swapSuccessors();
  2767. Worklist.push(Cond);
  2768. return &BI;
  2769. }
  2770. return nullptr;
  2771. }
  2772. Instruction *InstCombinerImpl::visitSwitchInst(SwitchInst &SI) {
  2773. Value *Cond = SI.getCondition();
  2774. Value *Op0;
  2775. ConstantInt *AddRHS;
  2776. if (match(Cond, m_Add(m_Value(Op0), m_ConstantInt(AddRHS)))) {
  2777. // Change 'switch (X+4) case 1:' into 'switch (X) case -3'.
  2778. for (auto Case : SI.cases()) {
  2779. Constant *NewCase = ConstantExpr::getSub(Case.getCaseValue(), AddRHS);
  2780. assert(isa<ConstantInt>(NewCase) &&
  2781. "Result of expression should be constant");
  2782. Case.setValue(cast<ConstantInt>(NewCase));
  2783. }
  2784. return replaceOperand(SI, 0, Op0);
  2785. }
  2786. KnownBits Known = computeKnownBits(Cond, 0, &SI);
  2787. unsigned LeadingKnownZeros = Known.countMinLeadingZeros();
  2788. unsigned LeadingKnownOnes = Known.countMinLeadingOnes();
  2789. // Compute the number of leading bits we can ignore.
  2790. // TODO: A better way to determine this would use ComputeNumSignBits().
  2791. for (auto &C : SI.cases()) {
  2792. LeadingKnownZeros = std::min(
  2793. LeadingKnownZeros, C.getCaseValue()->getValue().countLeadingZeros());
  2794. LeadingKnownOnes = std::min(
  2795. LeadingKnownOnes, C.getCaseValue()->getValue().countLeadingOnes());
  2796. }
  2797. unsigned NewWidth = Known.getBitWidth() - std::max(LeadingKnownZeros, LeadingKnownOnes);
  2798. // Shrink the condition operand if the new type is smaller than the old type.
  2799. // But do not shrink to a non-standard type, because backend can't generate
  2800. // good code for that yet.
  2801. // TODO: We can make it aggressive again after fixing PR39569.
  2802. if (NewWidth > 0 && NewWidth < Known.getBitWidth() &&
  2803. shouldChangeType(Known.getBitWidth(), NewWidth)) {
  2804. IntegerType *Ty = IntegerType::get(SI.getContext(), NewWidth);
  2805. Builder.SetInsertPoint(&SI);
  2806. Value *NewCond = Builder.CreateTrunc(Cond, Ty, "trunc");
  2807. for (auto Case : SI.cases()) {
  2808. APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(NewWidth);
  2809. Case.setValue(ConstantInt::get(SI.getContext(), TruncatedCase));
  2810. }
  2811. return replaceOperand(SI, 0, NewCond);
  2812. }
  2813. return nullptr;
  2814. }
  2815. Instruction *InstCombinerImpl::visitExtractValueInst(ExtractValueInst &EV) {
  2816. Value *Agg = EV.getAggregateOperand();
  2817. if (!EV.hasIndices())
  2818. return replaceInstUsesWith(EV, Agg);
  2819. if (Value *V = SimplifyExtractValueInst(Agg, EV.getIndices(),
  2820. SQ.getWithInstruction(&EV)))
  2821. return replaceInstUsesWith(EV, V);
  2822. if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) {
  2823. // We're extracting from an insertvalue instruction, compare the indices
  2824. const unsigned *exti, *exte, *insi, *inse;
  2825. for (exti = EV.idx_begin(), insi = IV->idx_begin(),
  2826. exte = EV.idx_end(), inse = IV->idx_end();
  2827. exti != exte && insi != inse;
  2828. ++exti, ++insi) {
  2829. if (*insi != *exti)
  2830. // The insert and extract both reference distinctly different elements.
  2831. // This means the extract is not influenced by the insert, and we can
  2832. // replace the aggregate operand of the extract with the aggregate
  2833. // operand of the insert. i.e., replace
  2834. // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
  2835. // %E = extractvalue { i32, { i32 } } %I, 0
  2836. // with
  2837. // %E = extractvalue { i32, { i32 } } %A, 0
  2838. return ExtractValueInst::Create(IV->getAggregateOperand(),
  2839. EV.getIndices());
  2840. }
  2841. if (exti == exte && insi == inse)
  2842. // Both iterators are at the end: Index lists are identical. Replace
  2843. // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
  2844. // %C = extractvalue { i32, { i32 } } %B, 1, 0
  2845. // with "i32 42"
  2846. return replaceInstUsesWith(EV, IV->getInsertedValueOperand());
  2847. if (exti == exte) {
  2848. // The extract list is a prefix of the insert list. i.e. replace
  2849. // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
  2850. // %E = extractvalue { i32, { i32 } } %I, 1
  2851. // with
  2852. // %X = extractvalue { i32, { i32 } } %A, 1
  2853. // %E = insertvalue { i32 } %X, i32 42, 0
  2854. // by switching the order of the insert and extract (though the
  2855. // insertvalue should be left in, since it may have other uses).
  2856. Value *NewEV = Builder.CreateExtractValue(IV->getAggregateOperand(),
  2857. EV.getIndices());
  2858. return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(),
  2859. makeArrayRef(insi, inse));
  2860. }
  2861. if (insi == inse)
  2862. // The insert list is a prefix of the extract list
  2863. // We can simply remove the common indices from the extract and make it
  2864. // operate on the inserted value instead of the insertvalue result.
  2865. // i.e., replace
  2866. // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
  2867. // %E = extractvalue { i32, { i32 } } %I, 1, 0
  2868. // with
  2869. // %E extractvalue { i32 } { i32 42 }, 0
  2870. return ExtractValueInst::Create(IV->getInsertedValueOperand(),
  2871. makeArrayRef(exti, exte));
  2872. }
  2873. if (WithOverflowInst *WO = dyn_cast<WithOverflowInst>(Agg)) {
  2874. // We're extracting from an overflow intrinsic, see if we're the only user,
  2875. // which allows us to simplify multiple result intrinsics to simpler
  2876. // things that just get one value.
  2877. if (WO->hasOneUse()) {
  2878. // Check if we're grabbing only the result of a 'with overflow' intrinsic
  2879. // and replace it with a traditional binary instruction.
  2880. if (*EV.idx_begin() == 0) {
  2881. Instruction::BinaryOps BinOp = WO->getBinaryOp();
  2882. Value *LHS = WO->getLHS(), *RHS = WO->getRHS();
  2883. // Replace the old instruction's uses with poison.
  2884. replaceInstUsesWith(*WO, PoisonValue::get(WO->getType()));
  2885. eraseInstFromFunction(*WO);
  2886. return BinaryOperator::Create(BinOp, LHS, RHS);
  2887. }
  2888. assert(*EV.idx_begin() == 1 &&
  2889. "unexpected extract index for overflow inst");
  2890. // If only the overflow result is used, and the right hand side is a
  2891. // constant (or constant splat), we can remove the intrinsic by directly
  2892. // checking for overflow.
  2893. const APInt *C;
  2894. if (match(WO->getRHS(), m_APInt(C))) {
  2895. // Compute the no-wrap range for LHS given RHS=C, then construct an
  2896. // equivalent icmp, potentially using an offset.
  2897. ConstantRange NWR =
  2898. ConstantRange::makeExactNoWrapRegion(WO->getBinaryOp(), *C,
  2899. WO->getNoWrapKind());
  2900. CmpInst::Predicate Pred;
  2901. APInt NewRHSC, Offset;
  2902. NWR.getEquivalentICmp(Pred, NewRHSC, Offset);
  2903. auto *OpTy = WO->getRHS()->getType();
  2904. auto *NewLHS = WO->getLHS();
  2905. if (Offset != 0)
  2906. NewLHS = Builder.CreateAdd(NewLHS, ConstantInt::get(OpTy, Offset));
  2907. return new ICmpInst(ICmpInst::getInversePredicate(Pred), NewLHS,
  2908. ConstantInt::get(OpTy, NewRHSC));
  2909. }
  2910. }
  2911. }
  2912. if (LoadInst *L = dyn_cast<LoadInst>(Agg))
  2913. // If the (non-volatile) load only has one use, we can rewrite this to a
  2914. // load from a GEP. This reduces the size of the load. If a load is used
  2915. // only by extractvalue instructions then this either must have been
  2916. // optimized before, or it is a struct with padding, in which case we
  2917. // don't want to do the transformation as it loses padding knowledge.
  2918. if (L->isSimple() && L->hasOneUse()) {
  2919. // extractvalue has integer indices, getelementptr has Value*s. Convert.
  2920. SmallVector<Value*, 4> Indices;
  2921. // Prefix an i32 0 since we need the first element.
  2922. Indices.push_back(Builder.getInt32(0));
  2923. for (unsigned Idx : EV.indices())
  2924. Indices.push_back(Builder.getInt32(Idx));
  2925. // We need to insert these at the location of the old load, not at that of
  2926. // the extractvalue.
  2927. Builder.SetInsertPoint(L);
  2928. Value *GEP = Builder.CreateInBoundsGEP(L->getType(),
  2929. L->getPointerOperand(), Indices);
  2930. Instruction *NL = Builder.CreateLoad(EV.getType(), GEP);
  2931. // Whatever aliasing information we had for the orignal load must also
  2932. // hold for the smaller load, so propagate the annotations.
  2933. NL->setAAMetadata(L->getAAMetadata());
  2934. // Returning the load directly will cause the main loop to insert it in
  2935. // the wrong spot, so use replaceInstUsesWith().
  2936. return replaceInstUsesWith(EV, NL);
  2937. }
  2938. // We could simplify extracts from other values. Note that nested extracts may
  2939. // already be simplified implicitly by the above: extract (extract (insert) )
  2940. // will be translated into extract ( insert ( extract ) ) first and then just
  2941. // the value inserted, if appropriate. Similarly for extracts from single-use
  2942. // loads: extract (extract (load)) will be translated to extract (load (gep))
  2943. // and if again single-use then via load (gep (gep)) to load (gep).
  2944. // However, double extracts from e.g. function arguments or return values
  2945. // aren't handled yet.
  2946. return nullptr;
  2947. }
  2948. /// Return 'true' if the given typeinfo will match anything.
  2949. static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo) {
  2950. switch (Personality) {
  2951. case EHPersonality::GNU_C:
  2952. case EHPersonality::GNU_C_SjLj:
  2953. case EHPersonality::Rust:
  2954. // The GCC C EH and Rust personality only exists to support cleanups, so
  2955. // it's not clear what the semantics of catch clauses are.
  2956. return false;
  2957. case EHPersonality::Unknown:
  2958. return false;
  2959. case EHPersonality::GNU_Ada:
  2960. // While __gnat_all_others_value will match any Ada exception, it doesn't
  2961. // match foreign exceptions (or didn't, before gcc-4.7).
  2962. return false;
  2963. case EHPersonality::GNU_CXX:
  2964. case EHPersonality::GNU_CXX_SjLj:
  2965. case EHPersonality::GNU_ObjC:
  2966. case EHPersonality::MSVC_X86SEH:
  2967. case EHPersonality::MSVC_TableSEH:
  2968. case EHPersonality::MSVC_CXX:
  2969. case EHPersonality::CoreCLR:
  2970. case EHPersonality::Wasm_CXX:
  2971. case EHPersonality::XL_CXX:
  2972. return TypeInfo->isNullValue();
  2973. }
  2974. llvm_unreachable("invalid enum");
  2975. }
  2976. static bool shorter_filter(const Value *LHS, const Value *RHS) {
  2977. return
  2978. cast<ArrayType>(LHS->getType())->getNumElements()
  2979. <
  2980. cast<ArrayType>(RHS->getType())->getNumElements();
  2981. }
  2982. Instruction *InstCombinerImpl::visitLandingPadInst(LandingPadInst &LI) {
  2983. // The logic here should be correct for any real-world personality function.
  2984. // However if that turns out not to be true, the offending logic can always
  2985. // be conditioned on the personality function, like the catch-all logic is.
  2986. EHPersonality Personality =
  2987. classifyEHPersonality(LI.getParent()->getParent()->getPersonalityFn());
  2988. // Simplify the list of clauses, eg by removing repeated catch clauses
  2989. // (these are often created by inlining).
  2990. bool MakeNewInstruction = false; // If true, recreate using the following:
  2991. SmallVector<Constant *, 16> NewClauses; // - Clauses for the new instruction;
  2992. bool CleanupFlag = LI.isCleanup(); // - The new instruction is a cleanup.
  2993. SmallPtrSet<Value *, 16> AlreadyCaught; // Typeinfos known caught already.
  2994. for (unsigned i = 0, e = LI.getNumClauses(); i != e; ++i) {
  2995. bool isLastClause = i + 1 == e;
  2996. if (LI.isCatch(i)) {
  2997. // A catch clause.
  2998. Constant *CatchClause = LI.getClause(i);
  2999. Constant *TypeInfo = CatchClause->stripPointerCasts();
  3000. // If we already saw this clause, there is no point in having a second
  3001. // copy of it.
  3002. if (AlreadyCaught.insert(TypeInfo).second) {
  3003. // This catch clause was not already seen.
  3004. NewClauses.push_back(CatchClause);
  3005. } else {
  3006. // Repeated catch clause - drop the redundant copy.
  3007. MakeNewInstruction = true;
  3008. }
  3009. // If this is a catch-all then there is no point in keeping any following
  3010. // clauses or marking the landingpad as having a cleanup.
  3011. if (isCatchAll(Personality, TypeInfo)) {
  3012. if (!isLastClause)
  3013. MakeNewInstruction = true;
  3014. CleanupFlag = false;
  3015. break;
  3016. }
  3017. } else {
  3018. // A filter clause. If any of the filter elements were already caught
  3019. // then they can be dropped from the filter. It is tempting to try to
  3020. // exploit the filter further by saying that any typeinfo that does not
  3021. // occur in the filter can't be caught later (and thus can be dropped).
  3022. // However this would be wrong, since typeinfos can match without being
  3023. // equal (for example if one represents a C++ class, and the other some
  3024. // class derived from it).
  3025. assert(LI.isFilter(i) && "Unsupported landingpad clause!");
  3026. Constant *FilterClause = LI.getClause(i);
  3027. ArrayType *FilterType = cast<ArrayType>(FilterClause->getType());
  3028. unsigned NumTypeInfos = FilterType->getNumElements();
  3029. // An empty filter catches everything, so there is no point in keeping any
  3030. // following clauses or marking the landingpad as having a cleanup. By
  3031. // dealing with this case here the following code is made a bit simpler.
  3032. if (!NumTypeInfos) {
  3033. NewClauses.push_back(FilterClause);
  3034. if (!isLastClause)
  3035. MakeNewInstruction = true;
  3036. CleanupFlag = false;
  3037. break;
  3038. }
  3039. bool MakeNewFilter = false; // If true, make a new filter.
  3040. SmallVector<Constant *, 16> NewFilterElts; // New elements.
  3041. if (isa<ConstantAggregateZero>(FilterClause)) {
  3042. // Not an empty filter - it contains at least one null typeinfo.
  3043. assert(NumTypeInfos > 0 && "Should have handled empty filter already!");
  3044. Constant *TypeInfo =
  3045. Constant::getNullValue(FilterType->getElementType());
  3046. // If this typeinfo is a catch-all then the filter can never match.
  3047. if (isCatchAll(Personality, TypeInfo)) {
  3048. // Throw the filter away.
  3049. MakeNewInstruction = true;
  3050. continue;
  3051. }
  3052. // There is no point in having multiple copies of this typeinfo, so
  3053. // discard all but the first copy if there is more than one.
  3054. NewFilterElts.push_back(TypeInfo);
  3055. if (NumTypeInfos > 1)
  3056. MakeNewFilter = true;
  3057. } else {
  3058. ConstantArray *Filter = cast<ConstantArray>(FilterClause);
  3059. SmallPtrSet<Value *, 16> SeenInFilter; // For uniquing the elements.
  3060. NewFilterElts.reserve(NumTypeInfos);
  3061. // Remove any filter elements that were already caught or that already
  3062. // occurred in the filter. While there, see if any of the elements are
  3063. // catch-alls. If so, the filter can be discarded.
  3064. bool SawCatchAll = false;
  3065. for (unsigned j = 0; j != NumTypeInfos; ++j) {
  3066. Constant *Elt = Filter->getOperand(j);
  3067. Constant *TypeInfo = Elt->stripPointerCasts();
  3068. if (isCatchAll(Personality, TypeInfo)) {
  3069. // This element is a catch-all. Bail out, noting this fact.
  3070. SawCatchAll = true;
  3071. break;
  3072. }
  3073. // Even if we've seen a type in a catch clause, we don't want to
  3074. // remove it from the filter. An unexpected type handler may be
  3075. // set up for a call site which throws an exception of the same
  3076. // type caught. In order for the exception thrown by the unexpected
  3077. // handler to propagate correctly, the filter must be correctly
  3078. // described for the call site.
  3079. //
  3080. // Example:
  3081. //
  3082. // void unexpected() { throw 1;}
  3083. // void foo() throw (int) {
  3084. // std::set_unexpected(unexpected);
  3085. // try {
  3086. // throw 2.0;
  3087. // } catch (int i) {}
  3088. // }
  3089. // There is no point in having multiple copies of the same typeinfo in
  3090. // a filter, so only add it if we didn't already.
  3091. if (SeenInFilter.insert(TypeInfo).second)
  3092. NewFilterElts.push_back(cast<Constant>(Elt));
  3093. }
  3094. // A filter containing a catch-all cannot match anything by definition.
  3095. if (SawCatchAll) {
  3096. // Throw the filter away.
  3097. MakeNewInstruction = true;
  3098. continue;
  3099. }
  3100. // If we dropped something from the filter, make a new one.
  3101. if (NewFilterElts.size() < NumTypeInfos)
  3102. MakeNewFilter = true;
  3103. }
  3104. if (MakeNewFilter) {
  3105. FilterType = ArrayType::get(FilterType->getElementType(),
  3106. NewFilterElts.size());
  3107. FilterClause = ConstantArray::get(FilterType, NewFilterElts);
  3108. MakeNewInstruction = true;
  3109. }
  3110. NewClauses.push_back(FilterClause);
  3111. // If the new filter is empty then it will catch everything so there is
  3112. // no point in keeping any following clauses or marking the landingpad
  3113. // as having a cleanup. The case of the original filter being empty was
  3114. // already handled above.
  3115. if (MakeNewFilter && !NewFilterElts.size()) {
  3116. assert(MakeNewInstruction && "New filter but not a new instruction!");
  3117. CleanupFlag = false;
  3118. break;
  3119. }
  3120. }
  3121. }
  3122. // If several filters occur in a row then reorder them so that the shortest
  3123. // filters come first (those with the smallest number of elements). This is
  3124. // advantageous because shorter filters are more likely to match, speeding up
  3125. // unwinding, but mostly because it increases the effectiveness of the other
  3126. // filter optimizations below.
  3127. for (unsigned i = 0, e = NewClauses.size(); i + 1 < e; ) {
  3128. unsigned j;
  3129. // Find the maximal 'j' s.t. the range [i, j) consists entirely of filters.
  3130. for (j = i; j != e; ++j)
  3131. if (!isa<ArrayType>(NewClauses[j]->getType()))
  3132. break;
  3133. // Check whether the filters are already sorted by length. We need to know
  3134. // if sorting them is actually going to do anything so that we only make a
  3135. // new landingpad instruction if it does.
  3136. for (unsigned k = i; k + 1 < j; ++k)
  3137. if (shorter_filter(NewClauses[k+1], NewClauses[k])) {
  3138. // Not sorted, so sort the filters now. Doing an unstable sort would be
  3139. // correct too but reordering filters pointlessly might confuse users.
  3140. std::stable_sort(NewClauses.begin() + i, NewClauses.begin() + j,
  3141. shorter_filter);
  3142. MakeNewInstruction = true;
  3143. break;
  3144. }
  3145. // Look for the next batch of filters.
  3146. i = j + 1;
  3147. }
  3148. // If typeinfos matched if and only if equal, then the elements of a filter L
  3149. // that occurs later than a filter F could be replaced by the intersection of
  3150. // the elements of F and L. In reality two typeinfos can match without being
  3151. // equal (for example if one represents a C++ class, and the other some class
  3152. // derived from it) so it would be wrong to perform this transform in general.
  3153. // However the transform is correct and useful if F is a subset of L. In that
  3154. // case L can be replaced by F, and thus removed altogether since repeating a
  3155. // filter is pointless. So here we look at all pairs of filters F and L where
  3156. // L follows F in the list of clauses, and remove L if every element of F is
  3157. // an element of L. This can occur when inlining C++ functions with exception
  3158. // specifications.
  3159. for (unsigned i = 0; i + 1 < NewClauses.size(); ++i) {
  3160. // Examine each filter in turn.
  3161. Value *Filter = NewClauses[i];
  3162. ArrayType *FTy = dyn_cast<ArrayType>(Filter->getType());
  3163. if (!FTy)
  3164. // Not a filter - skip it.
  3165. continue;
  3166. unsigned FElts = FTy->getNumElements();
  3167. // Examine each filter following this one. Doing this backwards means that
  3168. // we don't have to worry about filters disappearing under us when removed.
  3169. for (unsigned j = NewClauses.size() - 1; j != i; --j) {
  3170. Value *LFilter = NewClauses[j];
  3171. ArrayType *LTy = dyn_cast<ArrayType>(LFilter->getType());
  3172. if (!LTy)
  3173. // Not a filter - skip it.
  3174. continue;
  3175. // If Filter is a subset of LFilter, i.e. every element of Filter is also
  3176. // an element of LFilter, then discard LFilter.
  3177. SmallVectorImpl<Constant *>::iterator J = NewClauses.begin() + j;
  3178. // If Filter is empty then it is a subset of LFilter.
  3179. if (!FElts) {
  3180. // Discard LFilter.
  3181. NewClauses.erase(J);
  3182. MakeNewInstruction = true;
  3183. // Move on to the next filter.
  3184. continue;
  3185. }
  3186. unsigned LElts = LTy->getNumElements();
  3187. // If Filter is longer than LFilter then it cannot be a subset of it.
  3188. if (FElts > LElts)
  3189. // Move on to the next filter.
  3190. continue;
  3191. // At this point we know that LFilter has at least one element.
  3192. if (isa<ConstantAggregateZero>(LFilter)) { // LFilter only contains zeros.
  3193. // Filter is a subset of LFilter iff Filter contains only zeros (as we
  3194. // already know that Filter is not longer than LFilter).
  3195. if (isa<ConstantAggregateZero>(Filter)) {
  3196. assert(FElts <= LElts && "Should have handled this case earlier!");
  3197. // Discard LFilter.
  3198. NewClauses.erase(J);
  3199. MakeNewInstruction = true;
  3200. }
  3201. // Move on to the next filter.
  3202. continue;
  3203. }
  3204. ConstantArray *LArray = cast<ConstantArray>(LFilter);
  3205. if (isa<ConstantAggregateZero>(Filter)) { // Filter only contains zeros.
  3206. // Since Filter is non-empty and contains only zeros, it is a subset of
  3207. // LFilter iff LFilter contains a zero.
  3208. assert(FElts > 0 && "Should have eliminated the empty filter earlier!");
  3209. for (unsigned l = 0; l != LElts; ++l)
  3210. if (LArray->getOperand(l)->isNullValue()) {
  3211. // LFilter contains a zero - discard it.
  3212. NewClauses.erase(J);
  3213. MakeNewInstruction = true;
  3214. break;
  3215. }
  3216. // Move on to the next filter.
  3217. continue;
  3218. }
  3219. // At this point we know that both filters are ConstantArrays. Loop over
  3220. // operands to see whether every element of Filter is also an element of
  3221. // LFilter. Since filters tend to be short this is probably faster than
  3222. // using a method that scales nicely.
  3223. ConstantArray *FArray = cast<ConstantArray>(Filter);
  3224. bool AllFound = true;
  3225. for (unsigned f = 0; f != FElts; ++f) {
  3226. Value *FTypeInfo = FArray->getOperand(f)->stripPointerCasts();
  3227. AllFound = false;
  3228. for (unsigned l = 0; l != LElts; ++l) {
  3229. Value *LTypeInfo = LArray->getOperand(l)->stripPointerCasts();
  3230. if (LTypeInfo == FTypeInfo) {
  3231. AllFound = true;
  3232. break;
  3233. }
  3234. }
  3235. if (!AllFound)
  3236. break;
  3237. }
  3238. if (AllFound) {
  3239. // Discard LFilter.
  3240. NewClauses.erase(J);
  3241. MakeNewInstruction = true;
  3242. }
  3243. // Move on to the next filter.
  3244. }
  3245. }
  3246. // If we changed any of the clauses, replace the old landingpad instruction
  3247. // with a new one.
  3248. if (MakeNewInstruction) {
  3249. LandingPadInst *NLI = LandingPadInst::Create(LI.getType(),
  3250. NewClauses.size());
  3251. for (unsigned i = 0, e = NewClauses.size(); i != e; ++i)
  3252. NLI->addClause(NewClauses[i]);
  3253. // A landing pad with no clauses must have the cleanup flag set. It is
  3254. // theoretically possible, though highly unlikely, that we eliminated all
  3255. // clauses. If so, force the cleanup flag to true.
  3256. if (NewClauses.empty())
  3257. CleanupFlag = true;
  3258. NLI->setCleanup(CleanupFlag);
  3259. return NLI;
  3260. }
  3261. // Even if none of the clauses changed, we may nonetheless have understood
  3262. // that the cleanup flag is pointless. Clear it if so.
  3263. if (LI.isCleanup() != CleanupFlag) {
  3264. assert(!CleanupFlag && "Adding a cleanup, not removing one?!");
  3265. LI.setCleanup(CleanupFlag);
  3266. return &LI;
  3267. }
  3268. return nullptr;
  3269. }
  3270. Value *
  3271. InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating(FreezeInst &OrigFI) {
  3272. // Try to push freeze through instructions that propagate but don't produce
  3273. // poison as far as possible. If an operand of freeze follows three
  3274. // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
  3275. // guaranteed-non-poison operands then push the freeze through to the one
  3276. // operand that is not guaranteed non-poison. The actual transform is as
  3277. // follows.
  3278. // Op1 = ... ; Op1 can be posion
  3279. // Op0 = Inst(Op1, NonPoisonOps...) ; Op0 has only one use and only have
  3280. // ; single guaranteed-non-poison operands
  3281. // ... = Freeze(Op0)
  3282. // =>
  3283. // Op1 = ...
  3284. // Op1.fr = Freeze(Op1)
  3285. // ... = Inst(Op1.fr, NonPoisonOps...)
  3286. auto *OrigOp = OrigFI.getOperand(0);
  3287. auto *OrigOpInst = dyn_cast<Instruction>(OrigOp);
  3288. // While we could change the other users of OrigOp to use freeze(OrigOp), that
  3289. // potentially reduces their optimization potential, so let's only do this iff
  3290. // the OrigOp is only used by the freeze.
  3291. if (!OrigOpInst || !OrigOpInst->hasOneUse() || isa<PHINode>(OrigOp))
  3292. return nullptr;
  3293. // We can't push the freeze through an instruction which can itself create
  3294. // poison. If the only source of new poison is flags, we can simply
  3295. // strip them (since we know the only use is the freeze and nothing can
  3296. // benefit from them.)
  3297. if (canCreateUndefOrPoison(cast<Operator>(OrigOp), /*ConsiderFlags*/ false))
  3298. return nullptr;
  3299. // If operand is guaranteed not to be poison, there is no need to add freeze
  3300. // to the operand. So we first find the operand that is not guaranteed to be
  3301. // poison.
  3302. Use *MaybePoisonOperand = nullptr;
  3303. for (Use &U : OrigOpInst->operands()) {
  3304. if (isGuaranteedNotToBeUndefOrPoison(U.get()))
  3305. continue;
  3306. if (!MaybePoisonOperand)
  3307. MaybePoisonOperand = &U;
  3308. else
  3309. return nullptr;
  3310. }
  3311. OrigOpInst->dropPoisonGeneratingFlags();
  3312. // If all operands are guaranteed to be non-poison, we can drop freeze.
  3313. if (!MaybePoisonOperand)
  3314. return OrigOp;
  3315. auto *FrozenMaybePoisonOperand = new FreezeInst(
  3316. MaybePoisonOperand->get(), MaybePoisonOperand->get()->getName() + ".fr");
  3317. replaceUse(*MaybePoisonOperand, FrozenMaybePoisonOperand);
  3318. FrozenMaybePoisonOperand->insertBefore(OrigOpInst);
  3319. return OrigOp;
  3320. }
  3321. bool InstCombinerImpl::freezeDominatedUses(FreezeInst &FI) {
  3322. Value *Op = FI.getOperand(0);
  3323. if (isa<Constant>(Op))
  3324. return false;
  3325. bool Changed = false;
  3326. Op->replaceUsesWithIf(&FI, [&](Use &U) -> bool {
  3327. bool Dominates = DT.dominates(&FI, U);
  3328. Changed |= Dominates;
  3329. return Dominates;
  3330. });
  3331. return Changed;
  3332. }
  3333. Instruction *InstCombinerImpl::visitFreeze(FreezeInst &I) {
  3334. Value *Op0 = I.getOperand(0);
  3335. if (Value *V = SimplifyFreezeInst(Op0, SQ.getWithInstruction(&I)))
  3336. return replaceInstUsesWith(I, V);
  3337. // freeze (phi const, x) --> phi const, (freeze x)
  3338. if (auto *PN = dyn_cast<PHINode>(Op0)) {
  3339. if (Instruction *NV = foldOpIntoPhi(I, PN))
  3340. return NV;
  3341. }
  3342. if (Value *NI = pushFreezeToPreventPoisonFromPropagating(I))
  3343. return replaceInstUsesWith(I, NI);
  3344. if (match(Op0, m_Undef())) {
  3345. // If I is freeze(undef), see its uses and fold it to the best constant.
  3346. // - or: pick -1
  3347. // - select's condition: pick the value that leads to choosing a constant
  3348. // - other ops: pick 0
  3349. Constant *BestValue = nullptr;
  3350. Constant *NullValue = Constant::getNullValue(I.getType());
  3351. for (const auto *U : I.users()) {
  3352. Constant *C = NullValue;
  3353. if (match(U, m_Or(m_Value(), m_Value())))
  3354. C = Constant::getAllOnesValue(I.getType());
  3355. else if (const auto *SI = dyn_cast<SelectInst>(U)) {
  3356. if (SI->getCondition() == &I) {
  3357. APInt CondVal(1, isa<Constant>(SI->getFalseValue()) ? 0 : 1);
  3358. C = Constant::getIntegerValue(I.getType(), CondVal);
  3359. }
  3360. }
  3361. if (!BestValue)
  3362. BestValue = C;
  3363. else if (BestValue != C)
  3364. BestValue = NullValue;
  3365. }
  3366. return replaceInstUsesWith(I, BestValue);
  3367. }
  3368. // Replace all dominated uses of Op to freeze(Op).
  3369. if (freezeDominatedUses(I))
  3370. return &I;
  3371. return nullptr;
  3372. }
  3373. /// Check for case where the call writes to an otherwise dead alloca. This
  3374. /// shows up for unused out-params in idiomatic C/C++ code. Note that this
  3375. /// helper *only* analyzes the write; doesn't check any other legality aspect.
  3376. static bool SoleWriteToDeadLocal(Instruction *I, TargetLibraryInfo &TLI) {
  3377. auto *CB = dyn_cast<CallBase>(I);
  3378. if (!CB)
  3379. // TODO: handle e.g. store to alloca here - only worth doing if we extend
  3380. // to allow reload along used path as described below. Otherwise, this
  3381. // is simply a store to a dead allocation which will be removed.
  3382. return false;
  3383. Optional<MemoryLocation> Dest = MemoryLocation::getForDest(CB, TLI);
  3384. if (!Dest)
  3385. return false;
  3386. auto *AI = dyn_cast<AllocaInst>(getUnderlyingObject(Dest->Ptr));
  3387. if (!AI)
  3388. // TODO: allow malloc?
  3389. return false;
  3390. // TODO: allow memory access dominated by move point? Note that since AI
  3391. // could have a reference to itself captured by the call, we would need to
  3392. // account for cycles in doing so.
  3393. SmallVector<const User *> AllocaUsers;
  3394. SmallPtrSet<const User *, 4> Visited;
  3395. auto pushUsers = [&](const Instruction &I) {
  3396. for (const User *U : I.users()) {
  3397. if (Visited.insert(U).second)
  3398. AllocaUsers.push_back(U);
  3399. }
  3400. };
  3401. pushUsers(*AI);
  3402. while (!AllocaUsers.empty()) {
  3403. auto *UserI = cast<Instruction>(AllocaUsers.pop_back_val());
  3404. if (isa<BitCastInst>(UserI) || isa<GetElementPtrInst>(UserI) ||
  3405. isa<AddrSpaceCastInst>(UserI)) {
  3406. pushUsers(*UserI);
  3407. continue;
  3408. }
  3409. if (UserI == CB)
  3410. continue;
  3411. // TODO: support lifetime.start/end here
  3412. return false;
  3413. }
  3414. return true;
  3415. }
  3416. /// Try to move the specified instruction from its current block into the
  3417. /// beginning of DestBlock, which can only happen if it's safe to move the
  3418. /// instruction past all of the instructions between it and the end of its
  3419. /// block.
  3420. static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock,
  3421. TargetLibraryInfo &TLI) {
  3422. assert(I->getUniqueUndroppableUser() && "Invariants didn't hold!");
  3423. BasicBlock *SrcBlock = I->getParent();
  3424. // Cannot move control-flow-involving, volatile loads, vaarg, etc.
  3425. if (isa<PHINode>(I) || I->isEHPad() || I->mayThrow() || !I->willReturn() ||
  3426. I->isTerminator())
  3427. return false;
  3428. // Do not sink static or dynamic alloca instructions. Static allocas must
  3429. // remain in the entry block, and dynamic allocas must not be sunk in between
  3430. // a stacksave / stackrestore pair, which would incorrectly shorten its
  3431. // lifetime.
  3432. if (isa<AllocaInst>(I))
  3433. return false;
  3434. // Do not sink into catchswitch blocks.
  3435. if (isa<CatchSwitchInst>(DestBlock->getTerminator()))
  3436. return false;
  3437. // Do not sink convergent call instructions.
  3438. if (auto *CI = dyn_cast<CallInst>(I)) {
  3439. if (CI->isConvergent())
  3440. return false;
  3441. }
  3442. // Unless we can prove that the memory write isn't visibile except on the
  3443. // path we're sinking to, we must bail.
  3444. if (I->mayWriteToMemory()) {
  3445. if (!SoleWriteToDeadLocal(I, TLI))
  3446. return false;
  3447. }
  3448. // We can only sink load instructions if there is nothing between the load and
  3449. // the end of block that could change the value.
  3450. if (I->mayReadFromMemory()) {
  3451. // We don't want to do any sophisticated alias analysis, so we only check
  3452. // the instructions after I in I's parent block if we try to sink to its
  3453. // successor block.
  3454. if (DestBlock->getUniquePredecessor() != I->getParent())
  3455. return false;
  3456. for (BasicBlock::iterator Scan = std::next(I->getIterator()),
  3457. E = I->getParent()->end();
  3458. Scan != E; ++Scan)
  3459. if (Scan->mayWriteToMemory())
  3460. return false;
  3461. }
  3462. I->dropDroppableUses([DestBlock](const Use *U) {
  3463. if (auto *I = dyn_cast<Instruction>(U->getUser()))
  3464. return I->getParent() != DestBlock;
  3465. return true;
  3466. });
  3467. /// FIXME: We could remove droppable uses that are not dominated by
  3468. /// the new position.
  3469. BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt();
  3470. I->moveBefore(&*InsertPos);
  3471. ++NumSunkInst;
  3472. // Also sink all related debug uses from the source basic block. Otherwise we
  3473. // get debug use before the def. Attempt to salvage debug uses first, to
  3474. // maximise the range variables have location for. If we cannot salvage, then
  3475. // mark the location undef: we know it was supposed to receive a new location
  3476. // here, but that computation has been sunk.
  3477. SmallVector<DbgVariableIntrinsic *, 2> DbgUsers;
  3478. findDbgUsers(DbgUsers, I);
  3479. // Process the sinking DbgUsers in reverse order, as we only want to clone the
  3480. // last appearing debug intrinsic for each given variable.
  3481. SmallVector<DbgVariableIntrinsic *, 2> DbgUsersToSink;
  3482. for (DbgVariableIntrinsic *DVI : DbgUsers)
  3483. if (DVI->getParent() == SrcBlock)
  3484. DbgUsersToSink.push_back(DVI);
  3485. llvm::sort(DbgUsersToSink,
  3486. [](auto *A, auto *B) { return B->comesBefore(A); });
  3487. SmallVector<DbgVariableIntrinsic *, 2> DIIClones;
  3488. SmallSet<DebugVariable, 4> SunkVariables;
  3489. for (auto User : DbgUsersToSink) {
  3490. // A dbg.declare instruction should not be cloned, since there can only be
  3491. // one per variable fragment. It should be left in the original place
  3492. // because the sunk instruction is not an alloca (otherwise we could not be
  3493. // here).
  3494. if (isa<DbgDeclareInst>(User))
  3495. continue;
  3496. DebugVariable DbgUserVariable =
  3497. DebugVariable(User->getVariable(), User->getExpression(),
  3498. User->getDebugLoc()->getInlinedAt());
  3499. if (!SunkVariables.insert(DbgUserVariable).second)
  3500. continue;
  3501. DIIClones.emplace_back(cast<DbgVariableIntrinsic>(User->clone()));
  3502. if (isa<DbgDeclareInst>(User) && isa<CastInst>(I))
  3503. DIIClones.back()->replaceVariableLocationOp(I, I->getOperand(0));
  3504. LLVM_DEBUG(dbgs() << "CLONE: " << *DIIClones.back() << '\n');
  3505. }
  3506. // Perform salvaging without the clones, then sink the clones.
  3507. if (!DIIClones.empty()) {
  3508. salvageDebugInfoForDbgValues(*I, DbgUsers);
  3509. // The clones are in reverse order of original appearance, reverse again to
  3510. // maintain the original order.
  3511. for (auto &DIIClone : llvm::reverse(DIIClones)) {
  3512. DIIClone->insertBefore(&*InsertPos);
  3513. LLVM_DEBUG(dbgs() << "SINK: " << *DIIClone << '\n');
  3514. }
  3515. }
  3516. return true;
  3517. }
  3518. bool InstCombinerImpl::run() {
  3519. while (!Worklist.isEmpty()) {
  3520. // Walk deferred instructions in reverse order, and push them to the
  3521. // worklist, which means they'll end up popped from the worklist in-order.
  3522. while (Instruction *I = Worklist.popDeferred()) {
  3523. // Check to see if we can DCE the instruction. We do this already here to
  3524. // reduce the number of uses and thus allow other folds to trigger.
  3525. // Note that eraseInstFromFunction() may push additional instructions on
  3526. // the deferred worklist, so this will DCE whole instruction chains.
  3527. if (isInstructionTriviallyDead(I, &TLI)) {
  3528. eraseInstFromFunction(*I);
  3529. ++NumDeadInst;
  3530. continue;
  3531. }
  3532. Worklist.push(I);
  3533. }
  3534. Instruction *I = Worklist.removeOne();
  3535. if (I == nullptr) continue; // skip null values.
  3536. // Check to see if we can DCE the instruction.
  3537. if (isInstructionTriviallyDead(I, &TLI)) {
  3538. eraseInstFromFunction(*I);
  3539. ++NumDeadInst;
  3540. continue;
  3541. }
  3542. if (!DebugCounter::shouldExecute(VisitCounter))
  3543. continue;
  3544. // Instruction isn't dead, see if we can constant propagate it.
  3545. if (!I->use_empty() &&
  3546. (I->getNumOperands() == 0 || isa<Constant>(I->getOperand(0)))) {
  3547. if (Constant *C = ConstantFoldInstruction(I, DL, &TLI)) {
  3548. LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << *I
  3549. << '\n');
  3550. // Add operands to the worklist.
  3551. replaceInstUsesWith(*I, C);
  3552. ++NumConstProp;
  3553. if (isInstructionTriviallyDead(I, &TLI))
  3554. eraseInstFromFunction(*I);
  3555. MadeIRChange = true;
  3556. continue;
  3557. }
  3558. }
  3559. // See if we can trivially sink this instruction to its user if we can
  3560. // prove that the successor is not executed more frequently than our block.
  3561. // Return the UserBlock if successful.
  3562. auto getOptionalSinkBlockForInst =
  3563. [this](Instruction *I) -> Optional<BasicBlock *> {
  3564. if (!EnableCodeSinking)
  3565. return None;
  3566. auto *UserInst = cast_or_null<Instruction>(I->getUniqueUndroppableUser());
  3567. if (!UserInst)
  3568. return None;
  3569. BasicBlock *BB = I->getParent();
  3570. BasicBlock *UserParent = nullptr;
  3571. // Special handling for Phi nodes - get the block the use occurs in.
  3572. if (PHINode *PN = dyn_cast<PHINode>(UserInst)) {
  3573. for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
  3574. if (PN->getIncomingValue(i) == I) {
  3575. // Bail out if we have uses in different blocks. We don't do any
  3576. // sophisticated analysis (i.e finding NearestCommonDominator of these
  3577. // use blocks).
  3578. if (UserParent && UserParent != PN->getIncomingBlock(i))
  3579. return None;
  3580. UserParent = PN->getIncomingBlock(i);
  3581. }
  3582. }
  3583. assert(UserParent && "expected to find user block!");
  3584. } else
  3585. UserParent = UserInst->getParent();
  3586. // Try sinking to another block. If that block is unreachable, then do
  3587. // not bother. SimplifyCFG should handle it.
  3588. if (UserParent == BB || !DT.isReachableFromEntry(UserParent))
  3589. return None;
  3590. auto *Term = UserParent->getTerminator();
  3591. // See if the user is one of our successors that has only one
  3592. // predecessor, so that we don't have to split the critical edge.
  3593. // Another option where we can sink is a block that ends with a
  3594. // terminator that does not pass control to other block (such as
  3595. // return or unreachable or resume). In this case:
  3596. // - I dominates the User (by SSA form);
  3597. // - the User will be executed at most once.
  3598. // So sinking I down to User is always profitable or neutral.
  3599. if (UserParent->getUniquePredecessor() == BB || succ_empty(Term)) {
  3600. assert(DT.dominates(BB, UserParent) && "Dominance relation broken?");
  3601. return UserParent;
  3602. }
  3603. return None;
  3604. };
  3605. auto OptBB = getOptionalSinkBlockForInst(I);
  3606. if (OptBB) {
  3607. auto *UserParent = *OptBB;
  3608. // Okay, the CFG is simple enough, try to sink this instruction.
  3609. if (TryToSinkInstruction(I, UserParent, TLI)) {
  3610. LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n');
  3611. MadeIRChange = true;
  3612. // We'll add uses of the sunk instruction below, but since
  3613. // sinking can expose opportunities for it's *operands* add
  3614. // them to the worklist
  3615. for (Use &U : I->operands())
  3616. if (Instruction *OpI = dyn_cast<Instruction>(U.get()))
  3617. Worklist.push(OpI);
  3618. }
  3619. }
  3620. // Now that we have an instruction, try combining it to simplify it.
  3621. Builder.SetInsertPoint(I);
  3622. Builder.CollectMetadataToCopy(
  3623. I, {LLVMContext::MD_dbg, LLVMContext::MD_annotation});
  3624. #ifndef NDEBUG
  3625. std::string OrigI;
  3626. #endif
  3627. LLVM_DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str(););
  3628. LLVM_DEBUG(dbgs() << "IC: Visiting: " << OrigI << '\n');
  3629. if (Instruction *Result = visit(*I)) {
  3630. ++NumCombined;
  3631. // Should we replace the old instruction with a new one?
  3632. if (Result != I) {
  3633. LLVM_DEBUG(dbgs() << "IC: Old = " << *I << '\n'
  3634. << " New = " << *Result << '\n');
  3635. Result->copyMetadata(*I,
  3636. {LLVMContext::MD_dbg, LLVMContext::MD_annotation});
  3637. // Everything uses the new instruction now.
  3638. I->replaceAllUsesWith(Result);
  3639. // Move the name to the new instruction first.
  3640. Result->takeName(I);
  3641. // Insert the new instruction into the basic block...
  3642. BasicBlock *InstParent = I->getParent();
  3643. BasicBlock::iterator InsertPos = I->getIterator();
  3644. // Are we replace a PHI with something that isn't a PHI, or vice versa?
  3645. if (isa<PHINode>(Result) != isa<PHINode>(I)) {
  3646. // We need to fix up the insertion point.
  3647. if (isa<PHINode>(I)) // PHI -> Non-PHI
  3648. InsertPos = InstParent->getFirstInsertionPt();
  3649. else // Non-PHI -> PHI
  3650. InsertPos = InstParent->getFirstNonPHI()->getIterator();
  3651. }
  3652. InstParent->getInstList().insert(InsertPos, Result);
  3653. // Push the new instruction and any users onto the worklist.
  3654. Worklist.pushUsersToWorkList(*Result);
  3655. Worklist.push(Result);
  3656. eraseInstFromFunction(*I);
  3657. } else {
  3658. LLVM_DEBUG(dbgs() << "IC: Mod = " << OrigI << '\n'
  3659. << " New = " << *I << '\n');
  3660. // If the instruction was modified, it's possible that it is now dead.
  3661. // if so, remove it.
  3662. if (isInstructionTriviallyDead(I, &TLI)) {
  3663. eraseInstFromFunction(*I);
  3664. } else {
  3665. Worklist.pushUsersToWorkList(*I);
  3666. Worklist.push(I);
  3667. }
  3668. }
  3669. MadeIRChange = true;
  3670. }
  3671. }
  3672. Worklist.zap();
  3673. return MadeIRChange;
  3674. }
  3675. // Track the scopes used by !alias.scope and !noalias. In a function, a
  3676. // @llvm.experimental.noalias.scope.decl is only useful if that scope is used
  3677. // by both sets. If not, the declaration of the scope can be safely omitted.
  3678. // The MDNode of the scope can be omitted as well for the instructions that are
  3679. // part of this function. We do not do that at this point, as this might become
  3680. // too time consuming to do.
  3681. class AliasScopeTracker {
  3682. SmallPtrSet<const MDNode *, 8> UsedAliasScopesAndLists;
  3683. SmallPtrSet<const MDNode *, 8> UsedNoAliasScopesAndLists;
  3684. public:
  3685. void analyse(Instruction *I) {
  3686. // This seems to be faster than checking 'mayReadOrWriteMemory()'.
  3687. if (!I->hasMetadataOtherThanDebugLoc())
  3688. return;
  3689. auto Track = [](Metadata *ScopeList, auto &Container) {
  3690. const auto *MDScopeList = dyn_cast_or_null<MDNode>(ScopeList);
  3691. if (!MDScopeList || !Container.insert(MDScopeList).second)
  3692. return;
  3693. for (auto &MDOperand : MDScopeList->operands())
  3694. if (auto *MDScope = dyn_cast<MDNode>(MDOperand))
  3695. Container.insert(MDScope);
  3696. };
  3697. Track(I->getMetadata(LLVMContext::MD_alias_scope), UsedAliasScopesAndLists);
  3698. Track(I->getMetadata(LLVMContext::MD_noalias), UsedNoAliasScopesAndLists);
  3699. }
  3700. bool isNoAliasScopeDeclDead(Instruction *Inst) {
  3701. NoAliasScopeDeclInst *Decl = dyn_cast<NoAliasScopeDeclInst>(Inst);
  3702. if (!Decl)
  3703. return false;
  3704. assert(Decl->use_empty() &&
  3705. "llvm.experimental.noalias.scope.decl in use ?");
  3706. const MDNode *MDSL = Decl->getScopeList();
  3707. assert(MDSL->getNumOperands() == 1 &&
  3708. "llvm.experimental.noalias.scope should refer to a single scope");
  3709. auto &MDOperand = MDSL->getOperand(0);
  3710. if (auto *MD = dyn_cast<MDNode>(MDOperand))
  3711. return !UsedAliasScopesAndLists.contains(MD) ||
  3712. !UsedNoAliasScopesAndLists.contains(MD);
  3713. // Not an MDNode ? throw away.
  3714. return true;
  3715. }
  3716. };
  3717. /// Populate the IC worklist from a function, by walking it in depth-first
  3718. /// order and adding all reachable code to the worklist.
  3719. ///
  3720. /// This has a couple of tricks to make the code faster and more powerful. In
  3721. /// particular, we constant fold and DCE instructions as we go, to avoid adding
  3722. /// them to the worklist (this significantly speeds up instcombine on code where
  3723. /// many instructions are dead or constant). Additionally, if we find a branch
  3724. /// whose condition is a known constant, we only visit the reachable successors.
  3725. static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
  3726. const TargetLibraryInfo *TLI,
  3727. InstructionWorklist &ICWorklist) {
  3728. bool MadeIRChange = false;
  3729. SmallPtrSet<BasicBlock *, 32> Visited;
  3730. SmallVector<BasicBlock*, 256> Worklist;
  3731. Worklist.push_back(&F.front());
  3732. SmallVector<Instruction *, 128> InstrsForInstructionWorklist;
  3733. DenseMap<Constant *, Constant *> FoldedConstants;
  3734. AliasScopeTracker SeenAliasScopes;
  3735. do {
  3736. BasicBlock *BB = Worklist.pop_back_val();
  3737. // We have now visited this block! If we've already been here, ignore it.
  3738. if (!Visited.insert(BB).second)
  3739. continue;
  3740. for (Instruction &Inst : llvm::make_early_inc_range(*BB)) {
  3741. // ConstantProp instruction if trivially constant.
  3742. if (!Inst.use_empty() &&
  3743. (Inst.getNumOperands() == 0 || isa<Constant>(Inst.getOperand(0))))
  3744. if (Constant *C = ConstantFoldInstruction(&Inst, DL, TLI)) {
  3745. LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << Inst
  3746. << '\n');
  3747. Inst.replaceAllUsesWith(C);
  3748. ++NumConstProp;
  3749. if (isInstructionTriviallyDead(&Inst, TLI))
  3750. Inst.eraseFromParent();
  3751. MadeIRChange = true;
  3752. continue;
  3753. }
  3754. // See if we can constant fold its operands.
  3755. for (Use &U : Inst.operands()) {
  3756. if (!isa<ConstantVector>(U) && !isa<ConstantExpr>(U))
  3757. continue;
  3758. auto *C = cast<Constant>(U);
  3759. Constant *&FoldRes = FoldedConstants[C];
  3760. if (!FoldRes)
  3761. FoldRes = ConstantFoldConstant(C, DL, TLI);
  3762. if (FoldRes != C) {
  3763. LLVM_DEBUG(dbgs() << "IC: ConstFold operand of: " << Inst
  3764. << "\n Old = " << *C
  3765. << "\n New = " << *FoldRes << '\n');
  3766. U = FoldRes;
  3767. MadeIRChange = true;
  3768. }
  3769. }
  3770. // Skip processing debug and pseudo intrinsics in InstCombine. Processing
  3771. // these call instructions consumes non-trivial amount of time and
  3772. // provides no value for the optimization.
  3773. if (!Inst.isDebugOrPseudoInst()) {
  3774. InstrsForInstructionWorklist.push_back(&Inst);
  3775. SeenAliasScopes.analyse(&Inst);
  3776. }
  3777. }
  3778. // Recursively visit successors. If this is a branch or switch on a
  3779. // constant, only visit the reachable successor.
  3780. Instruction *TI = BB->getTerminator();
  3781. if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
  3782. if (BI->isConditional() && isa<ConstantInt>(BI->getCondition())) {
  3783. bool CondVal = cast<ConstantInt>(BI->getCondition())->getZExtValue();
  3784. BasicBlock *ReachableBB = BI->getSuccessor(!CondVal);
  3785. Worklist.push_back(ReachableBB);
  3786. continue;
  3787. }
  3788. } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
  3789. if (ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition())) {
  3790. Worklist.push_back(SI->findCaseValue(Cond)->getCaseSuccessor());
  3791. continue;
  3792. }
  3793. }
  3794. append_range(Worklist, successors(TI));
  3795. } while (!Worklist.empty());
  3796. // Remove instructions inside unreachable blocks. This prevents the
  3797. // instcombine code from having to deal with some bad special cases, and
  3798. // reduces use counts of instructions.
  3799. for (BasicBlock &BB : F) {
  3800. if (Visited.count(&BB))
  3801. continue;
  3802. unsigned NumDeadInstInBB;
  3803. unsigned NumDeadDbgInstInBB;
  3804. std::tie(NumDeadInstInBB, NumDeadDbgInstInBB) =
  3805. removeAllNonTerminatorAndEHPadInstructions(&BB);
  3806. MadeIRChange |= NumDeadInstInBB + NumDeadDbgInstInBB > 0;
  3807. NumDeadInst += NumDeadInstInBB;
  3808. }
  3809. // Once we've found all of the instructions to add to instcombine's worklist,
  3810. // add them in reverse order. This way instcombine will visit from the top
  3811. // of the function down. This jives well with the way that it adds all uses
  3812. // of instructions to the worklist after doing a transformation, thus avoiding
  3813. // some N^2 behavior in pathological cases.
  3814. ICWorklist.reserve(InstrsForInstructionWorklist.size());
  3815. for (Instruction *Inst : reverse(InstrsForInstructionWorklist)) {
  3816. // DCE instruction if trivially dead. As we iterate in reverse program
  3817. // order here, we will clean up whole chains of dead instructions.
  3818. if (isInstructionTriviallyDead(Inst, TLI) ||
  3819. SeenAliasScopes.isNoAliasScopeDeclDead(Inst)) {
  3820. ++NumDeadInst;
  3821. LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
  3822. salvageDebugInfo(*Inst);
  3823. Inst->eraseFromParent();
  3824. MadeIRChange = true;
  3825. continue;
  3826. }
  3827. ICWorklist.push(Inst);
  3828. }
  3829. return MadeIRChange;
  3830. }
  3831. static bool combineInstructionsOverFunction(
  3832. Function &F, InstructionWorklist &Worklist, AliasAnalysis *AA,
  3833. AssumptionCache &AC, TargetLibraryInfo &TLI, TargetTransformInfo &TTI,
  3834. DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI,
  3835. ProfileSummaryInfo *PSI, unsigned MaxIterations, LoopInfo *LI) {
  3836. auto &DL = F.getParent()->getDataLayout();
  3837. MaxIterations = std::min(MaxIterations, LimitMaxIterations.getValue());
  3838. /// Builder - This is an IRBuilder that automatically inserts new
  3839. /// instructions into the worklist when they are created.
  3840. IRBuilder<TargetFolder, IRBuilderCallbackInserter> Builder(
  3841. F.getContext(), TargetFolder(DL),
  3842. IRBuilderCallbackInserter([&Worklist, &AC](Instruction *I) {
  3843. Worklist.add(I);
  3844. if (auto *Assume = dyn_cast<AssumeInst>(I))
  3845. AC.registerAssumption(Assume);
  3846. }));
  3847. // Lower dbg.declare intrinsics otherwise their value may be clobbered
  3848. // by instcombiner.
  3849. bool MadeIRChange = false;
  3850. if (ShouldLowerDbgDeclare)
  3851. MadeIRChange = LowerDbgDeclare(F);
  3852. // Iterate while there is work to do.
  3853. unsigned Iteration = 0;
  3854. while (true) {
  3855. ++NumWorklistIterations;
  3856. ++Iteration;
  3857. if (Iteration > InfiniteLoopDetectionThreshold) {
  3858. report_fatal_error(
  3859. "Instruction Combining seems stuck in an infinite loop after " +
  3860. Twine(InfiniteLoopDetectionThreshold) + " iterations.");
  3861. }
  3862. if (Iteration > MaxIterations) {
  3863. LLVM_DEBUG(dbgs() << "\n\n[IC] Iteration limit #" << MaxIterations
  3864. << " on " << F.getName()
  3865. << " reached; stopping before reaching a fixpoint\n");
  3866. break;
  3867. }
  3868. LLVM_DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
  3869. << F.getName() << "\n");
  3870. MadeIRChange |= prepareICWorklistFromFunction(F, DL, &TLI, Worklist);
  3871. InstCombinerImpl IC(Worklist, Builder, F.hasMinSize(), AA, AC, TLI, TTI, DT,
  3872. ORE, BFI, PSI, DL, LI);
  3873. IC.MaxArraySizeForCombine = MaxArraySize;
  3874. if (!IC.run())
  3875. break;
  3876. MadeIRChange = true;
  3877. }
  3878. return MadeIRChange;
  3879. }
  3880. InstCombinePass::InstCombinePass() : MaxIterations(LimitMaxIterations) {}
  3881. InstCombinePass::InstCombinePass(unsigned MaxIterations)
  3882. : MaxIterations(MaxIterations) {}
  3883. PreservedAnalyses InstCombinePass::run(Function &F,
  3884. FunctionAnalysisManager &AM) {
  3885. auto &AC = AM.getResult<AssumptionAnalysis>(F);
  3886. auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
  3887. auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
  3888. auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
  3889. auto &TTI = AM.getResult<TargetIRAnalysis>(F);
  3890. auto *LI = AM.getCachedResult<LoopAnalysis>(F);
  3891. auto *AA = &AM.getResult<AAManager>(F);
  3892. auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
  3893. ProfileSummaryInfo *PSI =
  3894. MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
  3895. auto *BFI = (PSI && PSI->hasProfileSummary()) ?
  3896. &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
  3897. if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
  3898. BFI, PSI, MaxIterations, LI))
  3899. // No changes, all analyses are preserved.
  3900. return PreservedAnalyses::all();
  3901. // Mark all the analyses that instcombine updates as preserved.
  3902. PreservedAnalyses PA;
  3903. PA.preserveSet<CFGAnalyses>();
  3904. return PA;
  3905. }
  3906. void InstructionCombiningPass::getAnalysisUsage(AnalysisUsage &AU) const {
  3907. AU.setPreservesCFG();
  3908. AU.addRequired<AAResultsWrapperPass>();
  3909. AU.addRequired<AssumptionCacheTracker>();
  3910. AU.addRequired<TargetLibraryInfoWrapperPass>();
  3911. AU.addRequired<TargetTransformInfoWrapperPass>();
  3912. AU.addRequired<DominatorTreeWrapperPass>();
  3913. AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
  3914. AU.addPreserved<DominatorTreeWrapperPass>();
  3915. AU.addPreserved<AAResultsWrapperPass>();
  3916. AU.addPreserved<BasicAAWrapperPass>();
  3917. AU.addPreserved<GlobalsAAWrapperPass>();
  3918. AU.addRequired<ProfileSummaryInfoWrapperPass>();
  3919. LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
  3920. }
  3921. bool InstructionCombiningPass::runOnFunction(Function &F) {
  3922. if (skipFunction(F))
  3923. return false;
  3924. // Required analyses.
  3925. auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
  3926. auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
  3927. auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
  3928. auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
  3929. auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
  3930. auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
  3931. // Optional analyses.
  3932. auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
  3933. auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
  3934. ProfileSummaryInfo *PSI =
  3935. &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
  3936. BlockFrequencyInfo *BFI =
  3937. (PSI && PSI->hasProfileSummary()) ?
  3938. &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
  3939. nullptr;
  3940. return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
  3941. BFI, PSI, MaxIterations, LI);
  3942. }
  3943. char InstructionCombiningPass::ID = 0;
  3944. InstructionCombiningPass::InstructionCombiningPass()
  3945. : FunctionPass(ID), MaxIterations(InstCombineDefaultMaxIterations) {
  3946. initializeInstructionCombiningPassPass(*PassRegistry::getPassRegistry());
  3947. }
  3948. InstructionCombiningPass::InstructionCombiningPass(unsigned MaxIterations)
  3949. : FunctionPass(ID), MaxIterations(MaxIterations) {
  3950. initializeInstructionCombiningPassPass(*PassRegistry::getPassRegistry());
  3951. }
  3952. INITIALIZE_PASS_BEGIN(InstructionCombiningPass, "instcombine",
  3953. "Combine redundant instructions", false, false)
  3954. INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
  3955. INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
  3956. INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
  3957. INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  3958. INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
  3959. INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
  3960. INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
  3961. INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass)
  3962. INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
  3963. INITIALIZE_PASS_END(InstructionCombiningPass, "instcombine",
  3964. "Combine redundant instructions", false, false)
  3965. // Initialization Routines
  3966. void llvm::initializeInstCombine(PassRegistry &Registry) {
  3967. initializeInstructionCombiningPassPass(Registry);
  3968. }
  3969. void LLVMInitializeInstCombine(LLVMPassRegistryRef R) {
  3970. initializeInstructionCombiningPassPass(*unwrap(R));
  3971. }
  3972. FunctionPass *llvm::createInstructionCombiningPass() {
  3973. return new InstructionCombiningPass();
  3974. }
  3975. FunctionPass *llvm::createInstructionCombiningPass(unsigned MaxIterations) {
  3976. return new InstructionCombiningPass(MaxIterations);
  3977. }
  3978. void LLVMAddInstructionCombiningPass(LLVMPassManagerRef PM) {
  3979. unwrap(PM)->add(createInstructionCombiningPass());
  3980. }