InstCombineCalls.cpp 147 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825
  1. //===- InstCombineCalls.cpp -----------------------------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file implements the visitCall, visitInvoke, and visitCallBr functions.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "InstCombineInternal.h"
  13. #include "llvm/ADT/APFloat.h"
  14. #include "llvm/ADT/APInt.h"
  15. #include "llvm/ADT/APSInt.h"
  16. #include "llvm/ADT/ArrayRef.h"
  17. #include "llvm/ADT/STLFunctionalExtras.h"
  18. #include "llvm/ADT/SmallBitVector.h"
  19. #include "llvm/ADT/SmallVector.h"
  20. #include "llvm/ADT/Statistic.h"
  21. #include "llvm/Analysis/AliasAnalysis.h"
  22. #include "llvm/Analysis/AssumeBundleQueries.h"
  23. #include "llvm/Analysis/AssumptionCache.h"
  24. #include "llvm/Analysis/InstructionSimplify.h"
  25. #include "llvm/Analysis/Loads.h"
  26. #include "llvm/Analysis/MemoryBuiltins.h"
  27. #include "llvm/Analysis/ValueTracking.h"
  28. #include "llvm/Analysis/VectorUtils.h"
  29. #include "llvm/IR/Attributes.h"
  30. #include "llvm/IR/BasicBlock.h"
  31. #include "llvm/IR/Constant.h"
  32. #include "llvm/IR/Constants.h"
  33. #include "llvm/IR/DataLayout.h"
  34. #include "llvm/IR/DebugInfo.h"
  35. #include "llvm/IR/DerivedTypes.h"
  36. #include "llvm/IR/Function.h"
  37. #include "llvm/IR/GlobalVariable.h"
  38. #include "llvm/IR/InlineAsm.h"
  39. #include "llvm/IR/InstrTypes.h"
  40. #include "llvm/IR/Instruction.h"
  41. #include "llvm/IR/Instructions.h"
  42. #include "llvm/IR/IntrinsicInst.h"
  43. #include "llvm/IR/Intrinsics.h"
  44. #include "llvm/IR/IntrinsicsAArch64.h"
  45. #include "llvm/IR/IntrinsicsAMDGPU.h"
  46. #include "llvm/IR/IntrinsicsARM.h"
  47. #include "llvm/IR/IntrinsicsHexagon.h"
  48. #include "llvm/IR/LLVMContext.h"
  49. #include "llvm/IR/Metadata.h"
  50. #include "llvm/IR/PatternMatch.h"
  51. #include "llvm/IR/Statepoint.h"
  52. #include "llvm/IR/Type.h"
  53. #include "llvm/IR/User.h"
  54. #include "llvm/IR/Value.h"
  55. #include "llvm/IR/ValueHandle.h"
  56. #include "llvm/Support/AtomicOrdering.h"
  57. #include "llvm/Support/Casting.h"
  58. #include "llvm/Support/CommandLine.h"
  59. #include "llvm/Support/Compiler.h"
  60. #include "llvm/Support/Debug.h"
  61. #include "llvm/Support/ErrorHandling.h"
  62. #include "llvm/Support/KnownBits.h"
  63. #include "llvm/Support/MathExtras.h"
  64. #include "llvm/Support/raw_ostream.h"
  65. #include "llvm/Transforms/InstCombine/InstCombiner.h"
  66. #include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
  67. #include "llvm/Transforms/Utils/Local.h"
  68. #include "llvm/Transforms/Utils/SimplifyLibCalls.h"
  69. #include <algorithm>
  70. #include <cassert>
  71. #include <cstdint>
  72. #include <optional>
  73. #include <utility>
  74. #include <vector>
  75. #define DEBUG_TYPE "instcombine"
  76. #include "llvm/Transforms/Utils/InstructionWorklist.h"
  77. using namespace llvm;
  78. using namespace PatternMatch;
  79. STATISTIC(NumSimplified, "Number of library calls simplified");
  80. static cl::opt<unsigned> GuardWideningWindow(
  81. "instcombine-guard-widening-window",
  82. cl::init(3),
  83. cl::desc("How wide an instruction window to bypass looking for "
  84. "another guard"));
  85. namespace llvm {
  86. /// enable preservation of attributes in assume like:
  87. /// call void @llvm.assume(i1 true) [ "nonnull"(i32* %PTR) ]
  88. extern cl::opt<bool> EnableKnowledgeRetention;
  89. } // namespace llvm
  90. /// Return the specified type promoted as it would be to pass though a va_arg
  91. /// area.
  92. static Type *getPromotedType(Type *Ty) {
  93. if (IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
  94. if (ITy->getBitWidth() < 32)
  95. return Type::getInt32Ty(Ty->getContext());
  96. }
  97. return Ty;
  98. }
  99. /// Recognize a memcpy/memmove from a trivially otherwise unused alloca.
  100. /// TODO: This should probably be integrated with visitAllocSites, but that
  101. /// requires a deeper change to allow either unread or unwritten objects.
  102. static bool hasUndefSource(AnyMemTransferInst *MI) {
  103. auto *Src = MI->getRawSource();
  104. while (isa<GetElementPtrInst>(Src) || isa<BitCastInst>(Src)) {
  105. if (!Src->hasOneUse())
  106. return false;
  107. Src = cast<Instruction>(Src)->getOperand(0);
  108. }
  109. return isa<AllocaInst>(Src) && Src->hasOneUse();
  110. }
  111. Instruction *InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {
  112. Align DstAlign = getKnownAlignment(MI->getRawDest(), DL, MI, &AC, &DT);
  113. MaybeAlign CopyDstAlign = MI->getDestAlign();
  114. if (!CopyDstAlign || *CopyDstAlign < DstAlign) {
  115. MI->setDestAlignment(DstAlign);
  116. return MI;
  117. }
  118. Align SrcAlign = getKnownAlignment(MI->getRawSource(), DL, MI, &AC, &DT);
  119. MaybeAlign CopySrcAlign = MI->getSourceAlign();
  120. if (!CopySrcAlign || *CopySrcAlign < SrcAlign) {
  121. MI->setSourceAlignment(SrcAlign);
  122. return MI;
  123. }
  124. // If we have a store to a location which is known constant, we can conclude
  125. // that the store must be storing the constant value (else the memory
  126. // wouldn't be constant), and this must be a noop.
  127. if (!isModSet(AA->getModRefInfoMask(MI->getDest()))) {
  128. // Set the size of the copy to 0, it will be deleted on the next iteration.
  129. MI->setLength(Constant::getNullValue(MI->getLength()->getType()));
  130. return MI;
  131. }
  132. // If the source is provably undef, the memcpy/memmove doesn't do anything
  133. // (unless the transfer is volatile).
  134. if (hasUndefSource(MI) && !MI->isVolatile()) {
  135. // Set the size of the copy to 0, it will be deleted on the next iteration.
  136. MI->setLength(Constant::getNullValue(MI->getLength()->getType()));
  137. return MI;
  138. }
  139. // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
  140. // load/store.
  141. ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getLength());
  142. if (!MemOpLength) return nullptr;
  143. // Source and destination pointer types are always "i8*" for intrinsic. See
  144. // if the size is something we can handle with a single primitive load/store.
  145. // A single load+store correctly handles overlapping memory in the memmove
  146. // case.
  147. uint64_t Size = MemOpLength->getLimitedValue();
  148. assert(Size && "0-sized memory transferring should be removed already.");
  149. if (Size > 8 || (Size&(Size-1)))
  150. return nullptr; // If not 1/2/4/8 bytes, exit.
  151. // If it is an atomic and alignment is less than the size then we will
  152. // introduce the unaligned memory access which will be later transformed
  153. // into libcall in CodeGen. This is not evident performance gain so disable
  154. // it now.
  155. if (isa<AtomicMemTransferInst>(MI))
  156. if (*CopyDstAlign < Size || *CopySrcAlign < Size)
  157. return nullptr;
  158. // Use an integer load+store unless we can find something better.
  159. unsigned SrcAddrSp =
  160. cast<PointerType>(MI->getArgOperand(1)->getType())->getAddressSpace();
  161. unsigned DstAddrSp =
  162. cast<PointerType>(MI->getArgOperand(0)->getType())->getAddressSpace();
  163. IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
  164. Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp);
  165. Type *NewDstPtrTy = PointerType::get(IntType, DstAddrSp);
  166. // If the memcpy has metadata describing the members, see if we can get the
  167. // TBAA tag describing our copy.
  168. MDNode *CopyMD = nullptr;
  169. if (MDNode *M = MI->getMetadata(LLVMContext::MD_tbaa)) {
  170. CopyMD = M;
  171. } else if (MDNode *M = MI->getMetadata(LLVMContext::MD_tbaa_struct)) {
  172. if (M->getNumOperands() == 3 && M->getOperand(0) &&
  173. mdconst::hasa<ConstantInt>(M->getOperand(0)) &&
  174. mdconst::extract<ConstantInt>(M->getOperand(0))->isZero() &&
  175. M->getOperand(1) &&
  176. mdconst::hasa<ConstantInt>(M->getOperand(1)) &&
  177. mdconst::extract<ConstantInt>(M->getOperand(1))->getValue() ==
  178. Size &&
  179. M->getOperand(2) && isa<MDNode>(M->getOperand(2)))
  180. CopyMD = cast<MDNode>(M->getOperand(2));
  181. }
  182. Value *Src = Builder.CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy);
  183. Value *Dest = Builder.CreateBitCast(MI->getArgOperand(0), NewDstPtrTy);
  184. LoadInst *L = Builder.CreateLoad(IntType, Src);
  185. // Alignment from the mem intrinsic will be better, so use it.
  186. L->setAlignment(*CopySrcAlign);
  187. if (CopyMD)
  188. L->setMetadata(LLVMContext::MD_tbaa, CopyMD);
  189. MDNode *LoopMemParallelMD =
  190. MI->getMetadata(LLVMContext::MD_mem_parallel_loop_access);
  191. if (LoopMemParallelMD)
  192. L->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD);
  193. MDNode *AccessGroupMD = MI->getMetadata(LLVMContext::MD_access_group);
  194. if (AccessGroupMD)
  195. L->setMetadata(LLVMContext::MD_access_group, AccessGroupMD);
  196. StoreInst *S = Builder.CreateStore(L, Dest);
  197. // Alignment from the mem intrinsic will be better, so use it.
  198. S->setAlignment(*CopyDstAlign);
  199. if (CopyMD)
  200. S->setMetadata(LLVMContext::MD_tbaa, CopyMD);
  201. if (LoopMemParallelMD)
  202. S->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD);
  203. if (AccessGroupMD)
  204. S->setMetadata(LLVMContext::MD_access_group, AccessGroupMD);
  205. S->copyMetadata(*MI, LLVMContext::MD_DIAssignID);
  206. if (auto *MT = dyn_cast<MemTransferInst>(MI)) {
  207. // non-atomics can be volatile
  208. L->setVolatile(MT->isVolatile());
  209. S->setVolatile(MT->isVolatile());
  210. }
  211. if (isa<AtomicMemTransferInst>(MI)) {
  212. // atomics have to be unordered
  213. L->setOrdering(AtomicOrdering::Unordered);
  214. S->setOrdering(AtomicOrdering::Unordered);
  215. }
  216. // Set the size of the copy to 0, it will be deleted on the next iteration.
  217. MI->setLength(Constant::getNullValue(MemOpLength->getType()));
  218. return MI;
  219. }
  220. Instruction *InstCombinerImpl::SimplifyAnyMemSet(AnyMemSetInst *MI) {
  221. const Align KnownAlignment =
  222. getKnownAlignment(MI->getDest(), DL, MI, &AC, &DT);
  223. MaybeAlign MemSetAlign = MI->getDestAlign();
  224. if (!MemSetAlign || *MemSetAlign < KnownAlignment) {
  225. MI->setDestAlignment(KnownAlignment);
  226. return MI;
  227. }
  228. // If we have a store to a location which is known constant, we can conclude
  229. // that the store must be storing the constant value (else the memory
  230. // wouldn't be constant), and this must be a noop.
  231. if (!isModSet(AA->getModRefInfoMask(MI->getDest()))) {
  232. // Set the size of the copy to 0, it will be deleted on the next iteration.
  233. MI->setLength(Constant::getNullValue(MI->getLength()->getType()));
  234. return MI;
  235. }
  236. // Remove memset with an undef value.
  237. // FIXME: This is technically incorrect because it might overwrite a poison
  238. // value. Change to PoisonValue once #52930 is resolved.
  239. if (isa<UndefValue>(MI->getValue())) {
  240. // Set the size of the copy to 0, it will be deleted on the next iteration.
  241. MI->setLength(Constant::getNullValue(MI->getLength()->getType()));
  242. return MI;
  243. }
  244. // Extract the length and alignment and fill if they are constant.
  245. ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
  246. ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
  247. if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
  248. return nullptr;
  249. const uint64_t Len = LenC->getLimitedValue();
  250. assert(Len && "0-sized memory setting should be removed already.");
  251. const Align Alignment = MI->getDestAlign().valueOrOne();
  252. // If it is an atomic and alignment is less than the size then we will
  253. // introduce the unaligned memory access which will be later transformed
  254. // into libcall in CodeGen. This is not evident performance gain so disable
  255. // it now.
  256. if (isa<AtomicMemSetInst>(MI))
  257. if (Alignment < Len)
  258. return nullptr;
  259. // memset(s,c,n) -> store s, c (for n=1,2,4,8)
  260. if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
  261. Type *ITy = IntegerType::get(MI->getContext(), Len*8); // n=1 -> i8.
  262. Value *Dest = MI->getDest();
  263. unsigned DstAddrSp = cast<PointerType>(Dest->getType())->getAddressSpace();
  264. Type *NewDstPtrTy = PointerType::get(ITy, DstAddrSp);
  265. Dest = Builder.CreateBitCast(Dest, NewDstPtrTy);
  266. // Extract the fill value and store.
  267. const uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL;
  268. Constant *FillVal = ConstantInt::get(ITy, Fill);
  269. StoreInst *S = Builder.CreateStore(FillVal, Dest, MI->isVolatile());
  270. S->copyMetadata(*MI, LLVMContext::MD_DIAssignID);
  271. for (auto *DAI : at::getAssignmentMarkers(S)) {
  272. if (any_of(DAI->location_ops(), [&](Value *V) { return V == FillC; }))
  273. DAI->replaceVariableLocationOp(FillC, FillVal);
  274. }
  275. S->setAlignment(Alignment);
  276. if (isa<AtomicMemSetInst>(MI))
  277. S->setOrdering(AtomicOrdering::Unordered);
  278. // Set the size of the copy to 0, it will be deleted on the next iteration.
  279. MI->setLength(Constant::getNullValue(LenC->getType()));
  280. return MI;
  281. }
  282. return nullptr;
  283. }
  284. // TODO, Obvious Missing Transforms:
  285. // * Narrow width by halfs excluding zero/undef lanes
  286. Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) {
  287. Value *LoadPtr = II.getArgOperand(0);
  288. const Align Alignment =
  289. cast<ConstantInt>(II.getArgOperand(1))->getAlignValue();
  290. // If the mask is all ones or undefs, this is a plain vector load of the 1st
  291. // argument.
  292. if (maskIsAllOneOrUndef(II.getArgOperand(2))) {
  293. LoadInst *L = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
  294. "unmaskedload");
  295. L->copyMetadata(II);
  296. return L;
  297. }
  298. // If we can unconditionally load from this address, replace with a
  299. // load/select idiom. TODO: use DT for context sensitive query
  300. if (isDereferenceablePointer(LoadPtr, II.getType(),
  301. II.getModule()->getDataLayout(), &II, &AC)) {
  302. LoadInst *LI = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
  303. "unmaskedload");
  304. LI->copyMetadata(II);
  305. return Builder.CreateSelect(II.getArgOperand(2), LI, II.getArgOperand(3));
  306. }
  307. return nullptr;
  308. }
  309. // TODO, Obvious Missing Transforms:
  310. // * Single constant active lane -> store
  311. // * Narrow width by halfs excluding zero/undef lanes
  312. Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) {
  313. auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
  314. if (!ConstMask)
  315. return nullptr;
  316. // If the mask is all zeros, this instruction does nothing.
  317. if (ConstMask->isNullValue())
  318. return eraseInstFromFunction(II);
  319. // If the mask is all ones, this is a plain vector store of the 1st argument.
  320. if (ConstMask->isAllOnesValue()) {
  321. Value *StorePtr = II.getArgOperand(1);
  322. Align Alignment = cast<ConstantInt>(II.getArgOperand(2))->getAlignValue();
  323. StoreInst *S =
  324. new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment);
  325. S->copyMetadata(II);
  326. return S;
  327. }
  328. if (isa<ScalableVectorType>(ConstMask->getType()))
  329. return nullptr;
  330. // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
  331. APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);
  332. APInt UndefElts(DemandedElts.getBitWidth(), 0);
  333. if (Value *V =
  334. SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts, UndefElts))
  335. return replaceOperand(II, 0, V);
  336. return nullptr;
  337. }
  338. // TODO, Obvious Missing Transforms:
  339. // * Single constant active lane load -> load
  340. // * Dereferenceable address & few lanes -> scalarize speculative load/selects
  341. // * Adjacent vector addresses -> masked.load
  342. // * Narrow width by halfs excluding zero/undef lanes
  343. // * Vector incrementing address -> vector masked load
  344. Instruction *InstCombinerImpl::simplifyMaskedGather(IntrinsicInst &II) {
  345. auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(2));
  346. if (!ConstMask)
  347. return nullptr;
  348. // Vector splat address w/known mask -> scalar load
  349. // Fold the gather to load the source vector first lane
  350. // because it is reloading the same value each time
  351. if (ConstMask->isAllOnesValue())
  352. if (auto *SplatPtr = getSplatValue(II.getArgOperand(0))) {
  353. auto *VecTy = cast<VectorType>(II.getType());
  354. const Align Alignment =
  355. cast<ConstantInt>(II.getArgOperand(1))->getAlignValue();
  356. LoadInst *L = Builder.CreateAlignedLoad(VecTy->getElementType(), SplatPtr,
  357. Alignment, "load.scalar");
  358. Value *Shuf =
  359. Builder.CreateVectorSplat(VecTy->getElementCount(), L, "broadcast");
  360. return replaceInstUsesWith(II, cast<Instruction>(Shuf));
  361. }
  362. return nullptr;
  363. }
  364. // TODO, Obvious Missing Transforms:
  365. // * Single constant active lane -> store
  366. // * Adjacent vector addresses -> masked.store
  367. // * Narrow store width by halfs excluding zero/undef lanes
  368. // * Vector incrementing address -> vector masked store
  369. Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) {
  370. auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
  371. if (!ConstMask)
  372. return nullptr;
  373. // If the mask is all zeros, a scatter does nothing.
  374. if (ConstMask->isNullValue())
  375. return eraseInstFromFunction(II);
  376. // Vector splat address -> scalar store
  377. if (auto *SplatPtr = getSplatValue(II.getArgOperand(1))) {
  378. // scatter(splat(value), splat(ptr), non-zero-mask) -> store value, ptr
  379. if (auto *SplatValue = getSplatValue(II.getArgOperand(0))) {
  380. Align Alignment = cast<ConstantInt>(II.getArgOperand(2))->getAlignValue();
  381. StoreInst *S =
  382. new StoreInst(SplatValue, SplatPtr, /*IsVolatile=*/false, Alignment);
  383. S->copyMetadata(II);
  384. return S;
  385. }
  386. // scatter(vector, splat(ptr), splat(true)) -> store extract(vector,
  387. // lastlane), ptr
  388. if (ConstMask->isAllOnesValue()) {
  389. Align Alignment = cast<ConstantInt>(II.getArgOperand(2))->getAlignValue();
  390. VectorType *WideLoadTy = cast<VectorType>(II.getArgOperand(1)->getType());
  391. ElementCount VF = WideLoadTy->getElementCount();
  392. Constant *EC =
  393. ConstantInt::get(Builder.getInt32Ty(), VF.getKnownMinValue());
  394. Value *RunTimeVF = VF.isScalable() ? Builder.CreateVScale(EC) : EC;
  395. Value *LastLane = Builder.CreateSub(RunTimeVF, Builder.getInt32(1));
  396. Value *Extract =
  397. Builder.CreateExtractElement(II.getArgOperand(0), LastLane);
  398. StoreInst *S =
  399. new StoreInst(Extract, SplatPtr, /*IsVolatile=*/false, Alignment);
  400. S->copyMetadata(II);
  401. return S;
  402. }
  403. }
  404. if (isa<ScalableVectorType>(ConstMask->getType()))
  405. return nullptr;
  406. // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
  407. APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);
  408. APInt UndefElts(DemandedElts.getBitWidth(), 0);
  409. if (Value *V =
  410. SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts, UndefElts))
  411. return replaceOperand(II, 0, V);
  412. if (Value *V =
  413. SimplifyDemandedVectorElts(II.getOperand(1), DemandedElts, UndefElts))
  414. return replaceOperand(II, 1, V);
  415. return nullptr;
  416. }
  417. /// This function transforms launder.invariant.group and strip.invariant.group
  418. /// like:
  419. /// launder(launder(%x)) -> launder(%x) (the result is not the argument)
  420. /// launder(strip(%x)) -> launder(%x)
  421. /// strip(strip(%x)) -> strip(%x) (the result is not the argument)
  422. /// strip(launder(%x)) -> strip(%x)
  423. /// This is legal because it preserves the most recent information about
  424. /// the presence or absence of invariant.group.
  425. static Instruction *simplifyInvariantGroupIntrinsic(IntrinsicInst &II,
  426. InstCombinerImpl &IC) {
  427. auto *Arg = II.getArgOperand(0);
  428. auto *StrippedArg = Arg->stripPointerCasts();
  429. auto *StrippedInvariantGroupsArg = StrippedArg;
  430. while (auto *Intr = dyn_cast<IntrinsicInst>(StrippedInvariantGroupsArg)) {
  431. if (Intr->getIntrinsicID() != Intrinsic::launder_invariant_group &&
  432. Intr->getIntrinsicID() != Intrinsic::strip_invariant_group)
  433. break;
  434. StrippedInvariantGroupsArg = Intr->getArgOperand(0)->stripPointerCasts();
  435. }
  436. if (StrippedArg == StrippedInvariantGroupsArg)
  437. return nullptr; // No launders/strips to remove.
  438. Value *Result = nullptr;
  439. if (II.getIntrinsicID() == Intrinsic::launder_invariant_group)
  440. Result = IC.Builder.CreateLaunderInvariantGroup(StrippedInvariantGroupsArg);
  441. else if (II.getIntrinsicID() == Intrinsic::strip_invariant_group)
  442. Result = IC.Builder.CreateStripInvariantGroup(StrippedInvariantGroupsArg);
  443. else
  444. llvm_unreachable(
  445. "simplifyInvariantGroupIntrinsic only handles launder and strip");
  446. if (Result->getType()->getPointerAddressSpace() !=
  447. II.getType()->getPointerAddressSpace())
  448. Result = IC.Builder.CreateAddrSpaceCast(Result, II.getType());
  449. if (Result->getType() != II.getType())
  450. Result = IC.Builder.CreateBitCast(Result, II.getType());
  451. return cast<Instruction>(Result);
  452. }
  453. static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) {
  454. assert((II.getIntrinsicID() == Intrinsic::cttz ||
  455. II.getIntrinsicID() == Intrinsic::ctlz) &&
  456. "Expected cttz or ctlz intrinsic");
  457. bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz;
  458. Value *Op0 = II.getArgOperand(0);
  459. Value *Op1 = II.getArgOperand(1);
  460. Value *X;
  461. // ctlz(bitreverse(x)) -> cttz(x)
  462. // cttz(bitreverse(x)) -> ctlz(x)
  463. if (match(Op0, m_BitReverse(m_Value(X)))) {
  464. Intrinsic::ID ID = IsTZ ? Intrinsic::ctlz : Intrinsic::cttz;
  465. Function *F = Intrinsic::getDeclaration(II.getModule(), ID, II.getType());
  466. return CallInst::Create(F, {X, II.getArgOperand(1)});
  467. }
  468. if (II.getType()->isIntOrIntVectorTy(1)) {
  469. // ctlz/cttz i1 Op0 --> not Op0
  470. if (match(Op1, m_Zero()))
  471. return BinaryOperator::CreateNot(Op0);
  472. // If zero is poison, then the input can be assumed to be "true", so the
  473. // instruction simplifies to "false".
  474. assert(match(Op1, m_One()) && "Expected ctlz/cttz operand to be 0 or 1");
  475. return IC.replaceInstUsesWith(II, ConstantInt::getNullValue(II.getType()));
  476. }
  477. // If the operand is a select with constant arm(s), try to hoist ctlz/cttz.
  478. if (auto *Sel = dyn_cast<SelectInst>(Op0))
  479. if (Instruction *R = IC.FoldOpIntoSelect(II, Sel))
  480. return R;
  481. if (IsTZ) {
  482. // cttz(-x) -> cttz(x)
  483. if (match(Op0, m_Neg(m_Value(X))))
  484. return IC.replaceOperand(II, 0, X);
  485. // cttz(sext(x)) -> cttz(zext(x))
  486. if (match(Op0, m_OneUse(m_SExt(m_Value(X))))) {
  487. auto *Zext = IC.Builder.CreateZExt(X, II.getType());
  488. auto *CttzZext =
  489. IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, Zext, Op1);
  490. return IC.replaceInstUsesWith(II, CttzZext);
  491. }
  492. // Zext doesn't change the number of trailing zeros, so narrow:
  493. // cttz(zext(x)) -> zext(cttz(x)) if the 'ZeroIsPoison' parameter is 'true'.
  494. if (match(Op0, m_OneUse(m_ZExt(m_Value(X)))) && match(Op1, m_One())) {
  495. auto *Cttz = IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, X,
  496. IC.Builder.getTrue());
  497. auto *ZextCttz = IC.Builder.CreateZExt(Cttz, II.getType());
  498. return IC.replaceInstUsesWith(II, ZextCttz);
  499. }
  500. // cttz(abs(x)) -> cttz(x)
  501. // cttz(nabs(x)) -> cttz(x)
  502. Value *Y;
  503. SelectPatternFlavor SPF = matchSelectPattern(Op0, X, Y).Flavor;
  504. if (SPF == SPF_ABS || SPF == SPF_NABS)
  505. return IC.replaceOperand(II, 0, X);
  506. if (match(Op0, m_Intrinsic<Intrinsic::abs>(m_Value(X))))
  507. return IC.replaceOperand(II, 0, X);
  508. }
  509. KnownBits Known = IC.computeKnownBits(Op0, 0, &II);
  510. // Create a mask for bits above (ctlz) or below (cttz) the first known one.
  511. unsigned PossibleZeros = IsTZ ? Known.countMaxTrailingZeros()
  512. : Known.countMaxLeadingZeros();
  513. unsigned DefiniteZeros = IsTZ ? Known.countMinTrailingZeros()
  514. : Known.countMinLeadingZeros();
  515. // If all bits above (ctlz) or below (cttz) the first known one are known
  516. // zero, this value is constant.
  517. // FIXME: This should be in InstSimplify because we're replacing an
  518. // instruction with a constant.
  519. if (PossibleZeros == DefiniteZeros) {
  520. auto *C = ConstantInt::get(Op0->getType(), DefiniteZeros);
  521. return IC.replaceInstUsesWith(II, C);
  522. }
  523. // If the input to cttz/ctlz is known to be non-zero,
  524. // then change the 'ZeroIsPoison' parameter to 'true'
  525. // because we know the zero behavior can't affect the result.
  526. if (!Known.One.isZero() ||
  527. isKnownNonZero(Op0, IC.getDataLayout(), 0, &IC.getAssumptionCache(), &II,
  528. &IC.getDominatorTree())) {
  529. if (!match(II.getArgOperand(1), m_One()))
  530. return IC.replaceOperand(II, 1, IC.Builder.getTrue());
  531. }
  532. // Add range metadata since known bits can't completely reflect what we know.
  533. // TODO: Handle splat vectors.
  534. auto *IT = dyn_cast<IntegerType>(Op0->getType());
  535. if (IT && IT->getBitWidth() != 1 && !II.getMetadata(LLVMContext::MD_range)) {
  536. Metadata *LowAndHigh[] = {
  537. ConstantAsMetadata::get(ConstantInt::get(IT, DefiniteZeros)),
  538. ConstantAsMetadata::get(ConstantInt::get(IT, PossibleZeros + 1))};
  539. II.setMetadata(LLVMContext::MD_range,
  540. MDNode::get(II.getContext(), LowAndHigh));
  541. return &II;
  542. }
  543. return nullptr;
  544. }
  545. static Instruction *foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC) {
  546. assert(II.getIntrinsicID() == Intrinsic::ctpop &&
  547. "Expected ctpop intrinsic");
  548. Type *Ty = II.getType();
  549. unsigned BitWidth = Ty->getScalarSizeInBits();
  550. Value *Op0 = II.getArgOperand(0);
  551. Value *X, *Y;
  552. // ctpop(bitreverse(x)) -> ctpop(x)
  553. // ctpop(bswap(x)) -> ctpop(x)
  554. if (match(Op0, m_BitReverse(m_Value(X))) || match(Op0, m_BSwap(m_Value(X))))
  555. return IC.replaceOperand(II, 0, X);
  556. // ctpop(rot(x)) -> ctpop(x)
  557. if ((match(Op0, m_FShl(m_Value(X), m_Value(Y), m_Value())) ||
  558. match(Op0, m_FShr(m_Value(X), m_Value(Y), m_Value()))) &&
  559. X == Y)
  560. return IC.replaceOperand(II, 0, X);
  561. // ctpop(x | -x) -> bitwidth - cttz(x, false)
  562. if (Op0->hasOneUse() &&
  563. match(Op0, m_c_Or(m_Value(X), m_Neg(m_Deferred(X))))) {
  564. Function *F =
  565. Intrinsic::getDeclaration(II.getModule(), Intrinsic::cttz, Ty);
  566. auto *Cttz = IC.Builder.CreateCall(F, {X, IC.Builder.getFalse()});
  567. auto *Bw = ConstantInt::get(Ty, APInt(BitWidth, BitWidth));
  568. return IC.replaceInstUsesWith(II, IC.Builder.CreateSub(Bw, Cttz));
  569. }
  570. // ctpop(~x & (x - 1)) -> cttz(x, false)
  571. if (match(Op0,
  572. m_c_And(m_Not(m_Value(X)), m_Add(m_Deferred(X), m_AllOnes())))) {
  573. Function *F =
  574. Intrinsic::getDeclaration(II.getModule(), Intrinsic::cttz, Ty);
  575. return CallInst::Create(F, {X, IC.Builder.getFalse()});
  576. }
  577. // Zext doesn't change the number of set bits, so narrow:
  578. // ctpop (zext X) --> zext (ctpop X)
  579. if (match(Op0, m_OneUse(m_ZExt(m_Value(X))))) {
  580. Value *NarrowPop = IC.Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, X);
  581. return CastInst::Create(Instruction::ZExt, NarrowPop, Ty);
  582. }
  583. // If the operand is a select with constant arm(s), try to hoist ctpop.
  584. if (auto *Sel = dyn_cast<SelectInst>(Op0))
  585. if (Instruction *R = IC.FoldOpIntoSelect(II, Sel))
  586. return R;
  587. KnownBits Known(BitWidth);
  588. IC.computeKnownBits(Op0, Known, 0, &II);
  589. // If all bits are zero except for exactly one fixed bit, then the result
  590. // must be 0 or 1, and we can get that answer by shifting to LSB:
  591. // ctpop (X & 32) --> (X & 32) >> 5
  592. // TODO: Investigate removing this as its likely unnecessary given the below
  593. // `isKnownToBeAPowerOfTwo` check.
  594. if ((~Known.Zero).isPowerOf2())
  595. return BinaryOperator::CreateLShr(
  596. Op0, ConstantInt::get(Ty, (~Known.Zero).exactLogBase2()));
  597. // More generally we can also handle non-constant power of 2 patterns such as
  598. // shl/shr(Pow2, X), (X & -X), etc... by transforming:
  599. // ctpop(Pow2OrZero) --> icmp ne X, 0
  600. if (IC.isKnownToBeAPowerOfTwo(Op0, /* OrZero */ true))
  601. return CastInst::Create(Instruction::ZExt,
  602. IC.Builder.CreateICmp(ICmpInst::ICMP_NE, Op0,
  603. Constant::getNullValue(Ty)),
  604. Ty);
  605. // FIXME: Try to simplify vectors of integers.
  606. auto *IT = dyn_cast<IntegerType>(Ty);
  607. if (!IT)
  608. return nullptr;
  609. // Add range metadata since known bits can't completely reflect what we know.
  610. unsigned MinCount = Known.countMinPopulation();
  611. unsigned MaxCount = Known.countMaxPopulation();
  612. if (IT->getBitWidth() != 1 && !II.getMetadata(LLVMContext::MD_range)) {
  613. Metadata *LowAndHigh[] = {
  614. ConstantAsMetadata::get(ConstantInt::get(IT, MinCount)),
  615. ConstantAsMetadata::get(ConstantInt::get(IT, MaxCount + 1))};
  616. II.setMetadata(LLVMContext::MD_range,
  617. MDNode::get(II.getContext(), LowAndHigh));
  618. return &II;
  619. }
  620. return nullptr;
  621. }
  622. /// Convert a table lookup to shufflevector if the mask is constant.
  623. /// This could benefit tbl1 if the mask is { 7,6,5,4,3,2,1,0 }, in
  624. /// which case we could lower the shufflevector with rev64 instructions
  625. /// as it's actually a byte reverse.
  626. static Value *simplifyNeonTbl1(const IntrinsicInst &II,
  627. InstCombiner::BuilderTy &Builder) {
  628. // Bail out if the mask is not a constant.
  629. auto *C = dyn_cast<Constant>(II.getArgOperand(1));
  630. if (!C)
  631. return nullptr;
  632. auto *VecTy = cast<FixedVectorType>(II.getType());
  633. unsigned NumElts = VecTy->getNumElements();
  634. // Only perform this transformation for <8 x i8> vector types.
  635. if (!VecTy->getElementType()->isIntegerTy(8) || NumElts != 8)
  636. return nullptr;
  637. int Indexes[8];
  638. for (unsigned I = 0; I < NumElts; ++I) {
  639. Constant *COp = C->getAggregateElement(I);
  640. if (!COp || !isa<ConstantInt>(COp))
  641. return nullptr;
  642. Indexes[I] = cast<ConstantInt>(COp)->getLimitedValue();
  643. // Make sure the mask indices are in range.
  644. if ((unsigned)Indexes[I] >= NumElts)
  645. return nullptr;
  646. }
  647. auto *V1 = II.getArgOperand(0);
  648. auto *V2 = Constant::getNullValue(V1->getType());
  649. return Builder.CreateShuffleVector(V1, V2, ArrayRef(Indexes));
  650. }
  651. // Returns true iff the 2 intrinsics have the same operands, limiting the
  652. // comparison to the first NumOperands.
  653. static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E,
  654. unsigned NumOperands) {
  655. assert(I.arg_size() >= NumOperands && "Not enough operands");
  656. assert(E.arg_size() >= NumOperands && "Not enough operands");
  657. for (unsigned i = 0; i < NumOperands; i++)
  658. if (I.getArgOperand(i) != E.getArgOperand(i))
  659. return false;
  660. return true;
  661. }
  662. // Remove trivially empty start/end intrinsic ranges, i.e. a start
  663. // immediately followed by an end (ignoring debuginfo or other
  664. // start/end intrinsics in between). As this handles only the most trivial
  665. // cases, tracking the nesting level is not needed:
  666. //
  667. // call @llvm.foo.start(i1 0)
  668. // call @llvm.foo.start(i1 0) ; This one won't be skipped: it will be removed
  669. // call @llvm.foo.end(i1 0)
  670. // call @llvm.foo.end(i1 0) ; &I
  671. static bool
  672. removeTriviallyEmptyRange(IntrinsicInst &EndI, InstCombinerImpl &IC,
  673. std::function<bool(const IntrinsicInst &)> IsStart) {
  674. // We start from the end intrinsic and scan backwards, so that InstCombine
  675. // has already processed (and potentially removed) all the instructions
  676. // before the end intrinsic.
  677. BasicBlock::reverse_iterator BI(EndI), BE(EndI.getParent()->rend());
  678. for (; BI != BE; ++BI) {
  679. if (auto *I = dyn_cast<IntrinsicInst>(&*BI)) {
  680. if (I->isDebugOrPseudoInst() ||
  681. I->getIntrinsicID() == EndI.getIntrinsicID())
  682. continue;
  683. if (IsStart(*I)) {
  684. if (haveSameOperands(EndI, *I, EndI.arg_size())) {
  685. IC.eraseInstFromFunction(*I);
  686. IC.eraseInstFromFunction(EndI);
  687. return true;
  688. }
  689. // Skip start intrinsics that don't pair with this end intrinsic.
  690. continue;
  691. }
  692. }
  693. break;
  694. }
  695. return false;
  696. }
  697. Instruction *InstCombinerImpl::visitVAEndInst(VAEndInst &I) {
  698. removeTriviallyEmptyRange(I, *this, [](const IntrinsicInst &I) {
  699. return I.getIntrinsicID() == Intrinsic::vastart ||
  700. I.getIntrinsicID() == Intrinsic::vacopy;
  701. });
  702. return nullptr;
  703. }
  704. static CallInst *canonicalizeConstantArg0ToArg1(CallInst &Call) {
  705. assert(Call.arg_size() > 1 && "Need at least 2 args to swap");
  706. Value *Arg0 = Call.getArgOperand(0), *Arg1 = Call.getArgOperand(1);
  707. if (isa<Constant>(Arg0) && !isa<Constant>(Arg1)) {
  708. Call.setArgOperand(0, Arg1);
  709. Call.setArgOperand(1, Arg0);
  710. return &Call;
  711. }
  712. return nullptr;
  713. }
  714. /// Creates a result tuple for an overflow intrinsic \p II with a given
  715. /// \p Result and a constant \p Overflow value.
  716. static Instruction *createOverflowTuple(IntrinsicInst *II, Value *Result,
  717. Constant *Overflow) {
  718. Constant *V[] = {PoisonValue::get(Result->getType()), Overflow};
  719. StructType *ST = cast<StructType>(II->getType());
  720. Constant *Struct = ConstantStruct::get(ST, V);
  721. return InsertValueInst::Create(Struct, Result, 0);
  722. }
  723. Instruction *
  724. InstCombinerImpl::foldIntrinsicWithOverflowCommon(IntrinsicInst *II) {
  725. WithOverflowInst *WO = cast<WithOverflowInst>(II);
  726. Value *OperationResult = nullptr;
  727. Constant *OverflowResult = nullptr;
  728. if (OptimizeOverflowCheck(WO->getBinaryOp(), WO->isSigned(), WO->getLHS(),
  729. WO->getRHS(), *WO, OperationResult, OverflowResult))
  730. return createOverflowTuple(WO, OperationResult, OverflowResult);
  731. return nullptr;
  732. }
  733. static std::optional<bool> getKnownSign(Value *Op, Instruction *CxtI,
  734. const DataLayout &DL,
  735. AssumptionCache *AC,
  736. DominatorTree *DT) {
  737. KnownBits Known = computeKnownBits(Op, DL, 0, AC, CxtI, DT);
  738. if (Known.isNonNegative())
  739. return false;
  740. if (Known.isNegative())
  741. return true;
  742. Value *X, *Y;
  743. if (match(Op, m_NSWSub(m_Value(X), m_Value(Y))))
  744. return isImpliedByDomCondition(ICmpInst::ICMP_SLT, X, Y, CxtI, DL);
  745. return isImpliedByDomCondition(
  746. ICmpInst::ICMP_SLT, Op, Constant::getNullValue(Op->getType()), CxtI, DL);
  747. }
  748. /// Try to canonicalize min/max(X + C0, C1) as min/max(X, C1 - C0) + C0. This
  749. /// can trigger other combines.
  750. static Instruction *moveAddAfterMinMax(IntrinsicInst *II,
  751. InstCombiner::BuilderTy &Builder) {
  752. Intrinsic::ID MinMaxID = II->getIntrinsicID();
  753. assert((MinMaxID == Intrinsic::smax || MinMaxID == Intrinsic::smin ||
  754. MinMaxID == Intrinsic::umax || MinMaxID == Intrinsic::umin) &&
  755. "Expected a min or max intrinsic");
  756. // TODO: Match vectors with undef elements, but undef may not propagate.
  757. Value *Op0 = II->getArgOperand(0), *Op1 = II->getArgOperand(1);
  758. Value *X;
  759. const APInt *C0, *C1;
  760. if (!match(Op0, m_OneUse(m_Add(m_Value(X), m_APInt(C0)))) ||
  761. !match(Op1, m_APInt(C1)))
  762. return nullptr;
  763. // Check for necessary no-wrap and overflow constraints.
  764. bool IsSigned = MinMaxID == Intrinsic::smax || MinMaxID == Intrinsic::smin;
  765. auto *Add = cast<BinaryOperator>(Op0);
  766. if ((IsSigned && !Add->hasNoSignedWrap()) ||
  767. (!IsSigned && !Add->hasNoUnsignedWrap()))
  768. return nullptr;
  769. // If the constant difference overflows, then instsimplify should reduce the
  770. // min/max to the add or C1.
  771. bool Overflow;
  772. APInt CDiff =
  773. IsSigned ? C1->ssub_ov(*C0, Overflow) : C1->usub_ov(*C0, Overflow);
  774. assert(!Overflow && "Expected simplify of min/max");
  775. // min/max (add X, C0), C1 --> add (min/max X, C1 - C0), C0
  776. // Note: the "mismatched" no-overflow setting does not propagate.
  777. Constant *NewMinMaxC = ConstantInt::get(II->getType(), CDiff);
  778. Value *NewMinMax = Builder.CreateBinaryIntrinsic(MinMaxID, X, NewMinMaxC);
  779. return IsSigned ? BinaryOperator::CreateNSWAdd(NewMinMax, Add->getOperand(1))
  780. : BinaryOperator::CreateNUWAdd(NewMinMax, Add->getOperand(1));
  781. }
  782. /// Match a sadd_sat or ssub_sat which is using min/max to clamp the value.
  783. Instruction *InstCombinerImpl::matchSAddSubSat(IntrinsicInst &MinMax1) {
  784. Type *Ty = MinMax1.getType();
  785. // We are looking for a tree of:
  786. // max(INT_MIN, min(INT_MAX, add(sext(A), sext(B))))
  787. // Where the min and max could be reversed
  788. Instruction *MinMax2;
  789. BinaryOperator *AddSub;
  790. const APInt *MinValue, *MaxValue;
  791. if (match(&MinMax1, m_SMin(m_Instruction(MinMax2), m_APInt(MaxValue)))) {
  792. if (!match(MinMax2, m_SMax(m_BinOp(AddSub), m_APInt(MinValue))))
  793. return nullptr;
  794. } else if (match(&MinMax1,
  795. m_SMax(m_Instruction(MinMax2), m_APInt(MinValue)))) {
  796. if (!match(MinMax2, m_SMin(m_BinOp(AddSub), m_APInt(MaxValue))))
  797. return nullptr;
  798. } else
  799. return nullptr;
  800. // Check that the constants clamp a saturate, and that the new type would be
  801. // sensible to convert to.
  802. if (!(*MaxValue + 1).isPowerOf2() || -*MinValue != *MaxValue + 1)
  803. return nullptr;
  804. // In what bitwidth can this be treated as saturating arithmetics?
  805. unsigned NewBitWidth = (*MaxValue + 1).logBase2() + 1;
  806. // FIXME: This isn't quite right for vectors, but using the scalar type is a
  807. // good first approximation for what should be done there.
  808. if (!shouldChangeType(Ty->getScalarType()->getIntegerBitWidth(), NewBitWidth))
  809. return nullptr;
  810. // Also make sure that the inner min/max and the add/sub have one use.
  811. if (!MinMax2->hasOneUse() || !AddSub->hasOneUse())
  812. return nullptr;
  813. // Create the new type (which can be a vector type)
  814. Type *NewTy = Ty->getWithNewBitWidth(NewBitWidth);
  815. Intrinsic::ID IntrinsicID;
  816. if (AddSub->getOpcode() == Instruction::Add)
  817. IntrinsicID = Intrinsic::sadd_sat;
  818. else if (AddSub->getOpcode() == Instruction::Sub)
  819. IntrinsicID = Intrinsic::ssub_sat;
  820. else
  821. return nullptr;
  822. // The two operands of the add/sub must be nsw-truncatable to the NewTy. This
  823. // is usually achieved via a sext from a smaller type.
  824. if (ComputeMaxSignificantBits(AddSub->getOperand(0), 0, AddSub) >
  825. NewBitWidth ||
  826. ComputeMaxSignificantBits(AddSub->getOperand(1), 0, AddSub) > NewBitWidth)
  827. return nullptr;
  828. // Finally create and return the sat intrinsic, truncated to the new type
  829. Function *F = Intrinsic::getDeclaration(MinMax1.getModule(), IntrinsicID, NewTy);
  830. Value *AT = Builder.CreateTrunc(AddSub->getOperand(0), NewTy);
  831. Value *BT = Builder.CreateTrunc(AddSub->getOperand(1), NewTy);
  832. Value *Sat = Builder.CreateCall(F, {AT, BT});
  833. return CastInst::Create(Instruction::SExt, Sat, Ty);
  834. }
  835. /// If we have a clamp pattern like max (min X, 42), 41 -- where the output
  836. /// can only be one of two possible constant values -- turn that into a select
  837. /// of constants.
  838. static Instruction *foldClampRangeOfTwo(IntrinsicInst *II,
  839. InstCombiner::BuilderTy &Builder) {
  840. Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
  841. Value *X;
  842. const APInt *C0, *C1;
  843. if (!match(I1, m_APInt(C1)) || !I0->hasOneUse())
  844. return nullptr;
  845. CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE;
  846. switch (II->getIntrinsicID()) {
  847. case Intrinsic::smax:
  848. if (match(I0, m_SMin(m_Value(X), m_APInt(C0))) && *C0 == *C1 + 1)
  849. Pred = ICmpInst::ICMP_SGT;
  850. break;
  851. case Intrinsic::smin:
  852. if (match(I0, m_SMax(m_Value(X), m_APInt(C0))) && *C1 == *C0 + 1)
  853. Pred = ICmpInst::ICMP_SLT;
  854. break;
  855. case Intrinsic::umax:
  856. if (match(I0, m_UMin(m_Value(X), m_APInt(C0))) && *C0 == *C1 + 1)
  857. Pred = ICmpInst::ICMP_UGT;
  858. break;
  859. case Intrinsic::umin:
  860. if (match(I0, m_UMax(m_Value(X), m_APInt(C0))) && *C1 == *C0 + 1)
  861. Pred = ICmpInst::ICMP_ULT;
  862. break;
  863. default:
  864. llvm_unreachable("Expected min/max intrinsic");
  865. }
  866. if (Pred == CmpInst::BAD_ICMP_PREDICATE)
  867. return nullptr;
  868. // max (min X, 42), 41 --> X > 41 ? 42 : 41
  869. // min (max X, 42), 43 --> X < 43 ? 42 : 43
  870. Value *Cmp = Builder.CreateICmp(Pred, X, I1);
  871. return SelectInst::Create(Cmp, ConstantInt::get(II->getType(), *C0), I1);
  872. }
  873. /// If this min/max has a constant operand and an operand that is a matching
  874. /// min/max with a constant operand, constant-fold the 2 constant operands.
  875. static Instruction *reassociateMinMaxWithConstants(IntrinsicInst *II) {
  876. Intrinsic::ID MinMaxID = II->getIntrinsicID();
  877. auto *LHS = dyn_cast<IntrinsicInst>(II->getArgOperand(0));
  878. if (!LHS || LHS->getIntrinsicID() != MinMaxID)
  879. return nullptr;
  880. Constant *C0, *C1;
  881. if (!match(LHS->getArgOperand(1), m_ImmConstant(C0)) ||
  882. !match(II->getArgOperand(1), m_ImmConstant(C1)))
  883. return nullptr;
  884. // max (max X, C0), C1 --> max X, (max C0, C1) --> max X, NewC
  885. ICmpInst::Predicate Pred = MinMaxIntrinsic::getPredicate(MinMaxID);
  886. Constant *CondC = ConstantExpr::getICmp(Pred, C0, C1);
  887. Constant *NewC = ConstantExpr::getSelect(CondC, C0, C1);
  888. Module *Mod = II->getModule();
  889. Function *MinMax = Intrinsic::getDeclaration(Mod, MinMaxID, II->getType());
  890. return CallInst::Create(MinMax, {LHS->getArgOperand(0), NewC});
  891. }
  892. /// If this min/max has a matching min/max operand with a constant, try to push
  893. /// the constant operand into this instruction. This can enable more folds.
  894. static Instruction *
  895. reassociateMinMaxWithConstantInOperand(IntrinsicInst *II,
  896. InstCombiner::BuilderTy &Builder) {
  897. // Match and capture a min/max operand candidate.
  898. Value *X, *Y;
  899. Constant *C;
  900. Instruction *Inner;
  901. if (!match(II, m_c_MaxOrMin(m_OneUse(m_CombineAnd(
  902. m_Instruction(Inner),
  903. m_MaxOrMin(m_Value(X), m_ImmConstant(C)))),
  904. m_Value(Y))))
  905. return nullptr;
  906. // The inner op must match. Check for constants to avoid infinite loops.
  907. Intrinsic::ID MinMaxID = II->getIntrinsicID();
  908. auto *InnerMM = dyn_cast<IntrinsicInst>(Inner);
  909. if (!InnerMM || InnerMM->getIntrinsicID() != MinMaxID ||
  910. match(X, m_ImmConstant()) || match(Y, m_ImmConstant()))
  911. return nullptr;
  912. // max (max X, C), Y --> max (max X, Y), C
  913. Function *MinMax =
  914. Intrinsic::getDeclaration(II->getModule(), MinMaxID, II->getType());
  915. Value *NewInner = Builder.CreateBinaryIntrinsic(MinMaxID, X, Y);
  916. NewInner->takeName(Inner);
  917. return CallInst::Create(MinMax, {NewInner, C});
  918. }
  919. /// Reduce a sequence of min/max intrinsics with a common operand.
  920. static Instruction *factorizeMinMaxTree(IntrinsicInst *II) {
  921. // Match 3 of the same min/max ops. Example: umin(umin(), umin()).
  922. auto *LHS = dyn_cast<IntrinsicInst>(II->getArgOperand(0));
  923. auto *RHS = dyn_cast<IntrinsicInst>(II->getArgOperand(1));
  924. Intrinsic::ID MinMaxID = II->getIntrinsicID();
  925. if (!LHS || !RHS || LHS->getIntrinsicID() != MinMaxID ||
  926. RHS->getIntrinsicID() != MinMaxID ||
  927. (!LHS->hasOneUse() && !RHS->hasOneUse()))
  928. return nullptr;
  929. Value *A = LHS->getArgOperand(0);
  930. Value *B = LHS->getArgOperand(1);
  931. Value *C = RHS->getArgOperand(0);
  932. Value *D = RHS->getArgOperand(1);
  933. // Look for a common operand.
  934. Value *MinMaxOp = nullptr;
  935. Value *ThirdOp = nullptr;
  936. if (LHS->hasOneUse()) {
  937. // If the LHS is only used in this chain and the RHS is used outside of it,
  938. // reuse the RHS min/max because that will eliminate the LHS.
  939. if (D == A || C == A) {
  940. // min(min(a, b), min(c, a)) --> min(min(c, a), b)
  941. // min(min(a, b), min(a, d)) --> min(min(a, d), b)
  942. MinMaxOp = RHS;
  943. ThirdOp = B;
  944. } else if (D == B || C == B) {
  945. // min(min(a, b), min(c, b)) --> min(min(c, b), a)
  946. // min(min(a, b), min(b, d)) --> min(min(b, d), a)
  947. MinMaxOp = RHS;
  948. ThirdOp = A;
  949. }
  950. } else {
  951. assert(RHS->hasOneUse() && "Expected one-use operand");
  952. // Reuse the LHS. This will eliminate the RHS.
  953. if (D == A || D == B) {
  954. // min(min(a, b), min(c, a)) --> min(min(a, b), c)
  955. // min(min(a, b), min(c, b)) --> min(min(a, b), c)
  956. MinMaxOp = LHS;
  957. ThirdOp = C;
  958. } else if (C == A || C == B) {
  959. // min(min(a, b), min(b, d)) --> min(min(a, b), d)
  960. // min(min(a, b), min(c, b)) --> min(min(a, b), d)
  961. MinMaxOp = LHS;
  962. ThirdOp = D;
  963. }
  964. }
  965. if (!MinMaxOp || !ThirdOp)
  966. return nullptr;
  967. Module *Mod = II->getModule();
  968. Function *MinMax = Intrinsic::getDeclaration(Mod, MinMaxID, II->getType());
  969. return CallInst::Create(MinMax, { MinMaxOp, ThirdOp });
  970. }
  971. /// If all arguments of the intrinsic are unary shuffles with the same mask,
  972. /// try to shuffle after the intrinsic.
  973. static Instruction *
  974. foldShuffledIntrinsicOperands(IntrinsicInst *II,
  975. InstCombiner::BuilderTy &Builder) {
  976. // TODO: This should be extended to handle other intrinsics like fshl, ctpop,
  977. // etc. Use llvm::isTriviallyVectorizable() and related to determine
  978. // which intrinsics are safe to shuffle?
  979. switch (II->getIntrinsicID()) {
  980. case Intrinsic::smax:
  981. case Intrinsic::smin:
  982. case Intrinsic::umax:
  983. case Intrinsic::umin:
  984. case Intrinsic::fma:
  985. case Intrinsic::fshl:
  986. case Intrinsic::fshr:
  987. break;
  988. default:
  989. return nullptr;
  990. }
  991. Value *X;
  992. ArrayRef<int> Mask;
  993. if (!match(II->getArgOperand(0),
  994. m_Shuffle(m_Value(X), m_Undef(), m_Mask(Mask))))
  995. return nullptr;
  996. // At least 1 operand must have 1 use because we are creating 2 instructions.
  997. if (none_of(II->args(), [](Value *V) { return V->hasOneUse(); }))
  998. return nullptr;
  999. // See if all arguments are shuffled with the same mask.
  1000. SmallVector<Value *, 4> NewArgs(II->arg_size());
  1001. NewArgs[0] = X;
  1002. Type *SrcTy = X->getType();
  1003. for (unsigned i = 1, e = II->arg_size(); i != e; ++i) {
  1004. if (!match(II->getArgOperand(i),
  1005. m_Shuffle(m_Value(X), m_Undef(), m_SpecificMask(Mask))) ||
  1006. X->getType() != SrcTy)
  1007. return nullptr;
  1008. NewArgs[i] = X;
  1009. }
  1010. // intrinsic (shuf X, M), (shuf Y, M), ... --> shuf (intrinsic X, Y, ...), M
  1011. Instruction *FPI = isa<FPMathOperator>(II) ? II : nullptr;
  1012. Value *NewIntrinsic =
  1013. Builder.CreateIntrinsic(II->getIntrinsicID(), SrcTy, NewArgs, FPI);
  1014. return new ShuffleVectorInst(NewIntrinsic, Mask);
  1015. }
  1016. /// CallInst simplification. This mostly only handles folding of intrinsic
  1017. /// instructions. For normal calls, it allows visitCallBase to do the heavy
  1018. /// lifting.
  1019. Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
  1020. // Don't try to simplify calls without uses. It will not do anything useful,
  1021. // but will result in the following folds being skipped.
  1022. if (!CI.use_empty())
  1023. if (Value *V = simplifyCall(&CI, SQ.getWithInstruction(&CI)))
  1024. return replaceInstUsesWith(CI, V);
  1025. if (Value *FreedOp = getFreedOperand(&CI, &TLI))
  1026. return visitFree(CI, FreedOp);
  1027. // If the caller function (i.e. us, the function that contains this CallInst)
  1028. // is nounwind, mark the call as nounwind, even if the callee isn't.
  1029. if (CI.getFunction()->doesNotThrow() && !CI.doesNotThrow()) {
  1030. CI.setDoesNotThrow();
  1031. return &CI;
  1032. }
  1033. IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI);
  1034. if (!II) return visitCallBase(CI);
  1035. // For atomic unordered mem intrinsics if len is not a positive or
  1036. // not a multiple of element size then behavior is undefined.
  1037. if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(II))
  1038. if (ConstantInt *NumBytes = dyn_cast<ConstantInt>(AMI->getLength()))
  1039. if (NumBytes->getSExtValue() < 0 ||
  1040. (NumBytes->getZExtValue() % AMI->getElementSizeInBytes() != 0)) {
  1041. CreateNonTerminatorUnreachable(AMI);
  1042. assert(AMI->getType()->isVoidTy() &&
  1043. "non void atomic unordered mem intrinsic");
  1044. return eraseInstFromFunction(*AMI);
  1045. }
  1046. // Intrinsics cannot occur in an invoke or a callbr, so handle them here
  1047. // instead of in visitCallBase.
  1048. if (auto *MI = dyn_cast<AnyMemIntrinsic>(II)) {
  1049. bool Changed = false;
  1050. // memmove/cpy/set of zero bytes is a noop.
  1051. if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) {
  1052. if (NumBytes->isNullValue())
  1053. return eraseInstFromFunction(CI);
  1054. }
  1055. // No other transformations apply to volatile transfers.
  1056. if (auto *M = dyn_cast<MemIntrinsic>(MI))
  1057. if (M->isVolatile())
  1058. return nullptr;
  1059. // If we have a memmove and the source operation is a constant global,
  1060. // then the source and dest pointers can't alias, so we can change this
  1061. // into a call to memcpy.
  1062. if (auto *MMI = dyn_cast<AnyMemMoveInst>(MI)) {
  1063. if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
  1064. if (GVSrc->isConstant()) {
  1065. Module *M = CI.getModule();
  1066. Intrinsic::ID MemCpyID =
  1067. isa<AtomicMemMoveInst>(MMI)
  1068. ? Intrinsic::memcpy_element_unordered_atomic
  1069. : Intrinsic::memcpy;
  1070. Type *Tys[3] = { CI.getArgOperand(0)->getType(),
  1071. CI.getArgOperand(1)->getType(),
  1072. CI.getArgOperand(2)->getType() };
  1073. CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys));
  1074. Changed = true;
  1075. }
  1076. }
  1077. if (AnyMemTransferInst *MTI = dyn_cast<AnyMemTransferInst>(MI)) {
  1078. // memmove(x,x,size) -> noop.
  1079. if (MTI->getSource() == MTI->getDest())
  1080. return eraseInstFromFunction(CI);
  1081. }
  1082. // If we can determine a pointer alignment that is bigger than currently
  1083. // set, update the alignment.
  1084. if (auto *MTI = dyn_cast<AnyMemTransferInst>(MI)) {
  1085. if (Instruction *I = SimplifyAnyMemTransfer(MTI))
  1086. return I;
  1087. } else if (auto *MSI = dyn_cast<AnyMemSetInst>(MI)) {
  1088. if (Instruction *I = SimplifyAnyMemSet(MSI))
  1089. return I;
  1090. }
  1091. if (Changed) return II;
  1092. }
  1093. // For fixed width vector result intrinsics, use the generic demanded vector
  1094. // support.
  1095. if (auto *IIFVTy = dyn_cast<FixedVectorType>(II->getType())) {
  1096. auto VWidth = IIFVTy->getNumElements();
  1097. APInt UndefElts(VWidth, 0);
  1098. APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
  1099. if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, UndefElts)) {
  1100. if (V != II)
  1101. return replaceInstUsesWith(*II, V);
  1102. return II;
  1103. }
  1104. }
  1105. if (II->isCommutative()) {
  1106. if (CallInst *NewCall = canonicalizeConstantArg0ToArg1(CI))
  1107. return NewCall;
  1108. }
  1109. // Unused constrained FP intrinsic calls may have declared side effect, which
  1110. // prevents it from being removed. In some cases however the side effect is
  1111. // actually absent. To detect this case, call SimplifyConstrainedFPCall. If it
  1112. // returns a replacement, the call may be removed.
  1113. if (CI.use_empty() && isa<ConstrainedFPIntrinsic>(CI)) {
  1114. if (simplifyConstrainedFPCall(&CI, SQ.getWithInstruction(&CI)))
  1115. return eraseInstFromFunction(CI);
  1116. }
  1117. Intrinsic::ID IID = II->getIntrinsicID();
  1118. switch (IID) {
  1119. case Intrinsic::objectsize:
  1120. if (Value *V = lowerObjectSizeCall(II, DL, &TLI, AA, /*MustSucceed=*/false))
  1121. return replaceInstUsesWith(CI, V);
  1122. return nullptr;
  1123. case Intrinsic::abs: {
  1124. Value *IIOperand = II->getArgOperand(0);
  1125. bool IntMinIsPoison = cast<Constant>(II->getArgOperand(1))->isOneValue();
  1126. // abs(-x) -> abs(x)
  1127. // TODO: Copy nsw if it was present on the neg?
  1128. Value *X;
  1129. if (match(IIOperand, m_Neg(m_Value(X))))
  1130. return replaceOperand(*II, 0, X);
  1131. if (match(IIOperand, m_Select(m_Value(), m_Value(X), m_Neg(m_Deferred(X)))))
  1132. return replaceOperand(*II, 0, X);
  1133. if (match(IIOperand, m_Select(m_Value(), m_Neg(m_Value(X)), m_Deferred(X))))
  1134. return replaceOperand(*II, 0, X);
  1135. if (std::optional<bool> Sign = getKnownSign(IIOperand, II, DL, &AC, &DT)) {
  1136. // abs(x) -> x if x >= 0
  1137. if (!*Sign)
  1138. return replaceInstUsesWith(*II, IIOperand);
  1139. // abs(x) -> -x if x < 0
  1140. if (IntMinIsPoison)
  1141. return BinaryOperator::CreateNSWNeg(IIOperand);
  1142. return BinaryOperator::CreateNeg(IIOperand);
  1143. }
  1144. // abs (sext X) --> zext (abs X*)
  1145. // Clear the IsIntMin (nsw) bit on the abs to allow narrowing.
  1146. if (match(IIOperand, m_OneUse(m_SExt(m_Value(X))))) {
  1147. Value *NarrowAbs =
  1148. Builder.CreateBinaryIntrinsic(Intrinsic::abs, X, Builder.getFalse());
  1149. return CastInst::Create(Instruction::ZExt, NarrowAbs, II->getType());
  1150. }
  1151. // Match a complicated way to check if a number is odd/even:
  1152. // abs (srem X, 2) --> and X, 1
  1153. const APInt *C;
  1154. if (match(IIOperand, m_SRem(m_Value(X), m_APInt(C))) && *C == 2)
  1155. return BinaryOperator::CreateAnd(X, ConstantInt::get(II->getType(), 1));
  1156. break;
  1157. }
  1158. case Intrinsic::umin: {
  1159. Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
  1160. // umin(x, 1) == zext(x != 0)
  1161. if (match(I1, m_One())) {
  1162. assert(II->getType()->getScalarSizeInBits() != 1 &&
  1163. "Expected simplify of umin with max constant");
  1164. Value *Zero = Constant::getNullValue(I0->getType());
  1165. Value *Cmp = Builder.CreateICmpNE(I0, Zero);
  1166. return CastInst::Create(Instruction::ZExt, Cmp, II->getType());
  1167. }
  1168. [[fallthrough]];
  1169. }
  1170. case Intrinsic::umax: {
  1171. Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
  1172. Value *X, *Y;
  1173. if (match(I0, m_ZExt(m_Value(X))) && match(I1, m_ZExt(m_Value(Y))) &&
  1174. (I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) {
  1175. Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, Y);
  1176. return CastInst::Create(Instruction::ZExt, NarrowMaxMin, II->getType());
  1177. }
  1178. Constant *C;
  1179. if (match(I0, m_ZExt(m_Value(X))) && match(I1, m_Constant(C)) &&
  1180. I0->hasOneUse()) {
  1181. Constant *NarrowC = ConstantExpr::getTrunc(C, X->getType());
  1182. if (ConstantExpr::getZExt(NarrowC, II->getType()) == C) {
  1183. Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, NarrowC);
  1184. return CastInst::Create(Instruction::ZExt, NarrowMaxMin, II->getType());
  1185. }
  1186. }
  1187. // If both operands of unsigned min/max are sign-extended, it is still ok
  1188. // to narrow the operation.
  1189. [[fallthrough]];
  1190. }
  1191. case Intrinsic::smax:
  1192. case Intrinsic::smin: {
  1193. Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
  1194. Value *X, *Y;
  1195. if (match(I0, m_SExt(m_Value(X))) && match(I1, m_SExt(m_Value(Y))) &&
  1196. (I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) {
  1197. Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, Y);
  1198. return CastInst::Create(Instruction::SExt, NarrowMaxMin, II->getType());
  1199. }
  1200. Constant *C;
  1201. if (match(I0, m_SExt(m_Value(X))) && match(I1, m_Constant(C)) &&
  1202. I0->hasOneUse()) {
  1203. Constant *NarrowC = ConstantExpr::getTrunc(C, X->getType());
  1204. if (ConstantExpr::getSExt(NarrowC, II->getType()) == C) {
  1205. Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, NarrowC);
  1206. return CastInst::Create(Instruction::SExt, NarrowMaxMin, II->getType());
  1207. }
  1208. }
  1209. if (IID == Intrinsic::smax || IID == Intrinsic::smin) {
  1210. // smax (neg nsw X), (neg nsw Y) --> neg nsw (smin X, Y)
  1211. // smin (neg nsw X), (neg nsw Y) --> neg nsw (smax X, Y)
  1212. // TODO: Canonicalize neg after min/max if I1 is constant.
  1213. if (match(I0, m_NSWNeg(m_Value(X))) && match(I1, m_NSWNeg(m_Value(Y))) &&
  1214. (I0->hasOneUse() || I1->hasOneUse())) {
  1215. Intrinsic::ID InvID = getInverseMinMaxIntrinsic(IID);
  1216. Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, X, Y);
  1217. return BinaryOperator::CreateNSWNeg(InvMaxMin);
  1218. }
  1219. }
  1220. // If we can eliminate ~A and Y is free to invert:
  1221. // max ~A, Y --> ~(min A, ~Y)
  1222. //
  1223. // Examples:
  1224. // max ~A, ~Y --> ~(min A, Y)
  1225. // max ~A, C --> ~(min A, ~C)
  1226. // max ~A, (max ~Y, ~Z) --> ~min( A, (min Y, Z))
  1227. auto moveNotAfterMinMax = [&](Value *X, Value *Y) -> Instruction * {
  1228. Value *A;
  1229. if (match(X, m_OneUse(m_Not(m_Value(A)))) &&
  1230. !isFreeToInvert(A, A->hasOneUse()) &&
  1231. isFreeToInvert(Y, Y->hasOneUse())) {
  1232. Value *NotY = Builder.CreateNot(Y);
  1233. Intrinsic::ID InvID = getInverseMinMaxIntrinsic(IID);
  1234. Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, A, NotY);
  1235. return BinaryOperator::CreateNot(InvMaxMin);
  1236. }
  1237. return nullptr;
  1238. };
  1239. if (Instruction *I = moveNotAfterMinMax(I0, I1))
  1240. return I;
  1241. if (Instruction *I = moveNotAfterMinMax(I1, I0))
  1242. return I;
  1243. if (Instruction *I = moveAddAfterMinMax(II, Builder))
  1244. return I;
  1245. // smax(X, -X) --> abs(X)
  1246. // smin(X, -X) --> -abs(X)
  1247. // umax(X, -X) --> -abs(X)
  1248. // umin(X, -X) --> abs(X)
  1249. if (isKnownNegation(I0, I1)) {
  1250. // We can choose either operand as the input to abs(), but if we can
  1251. // eliminate the only use of a value, that's better for subsequent
  1252. // transforms/analysis.
  1253. if (I0->hasOneUse() && !I1->hasOneUse())
  1254. std::swap(I0, I1);
  1255. // This is some variant of abs(). See if we can propagate 'nsw' to the abs
  1256. // operation and potentially its negation.
  1257. bool IntMinIsPoison = isKnownNegation(I0, I1, /* NeedNSW */ true);
  1258. Value *Abs = Builder.CreateBinaryIntrinsic(
  1259. Intrinsic::abs, I0,
  1260. ConstantInt::getBool(II->getContext(), IntMinIsPoison));
  1261. // We don't have a "nabs" intrinsic, so negate if needed based on the
  1262. // max/min operation.
  1263. if (IID == Intrinsic::smin || IID == Intrinsic::umax)
  1264. Abs = Builder.CreateNeg(Abs, "nabs", /* NUW */ false, IntMinIsPoison);
  1265. return replaceInstUsesWith(CI, Abs);
  1266. }
  1267. if (Instruction *Sel = foldClampRangeOfTwo(II, Builder))
  1268. return Sel;
  1269. if (Instruction *SAdd = matchSAddSubSat(*II))
  1270. return SAdd;
  1271. if (match(I1, m_ImmConstant()))
  1272. if (auto *Sel = dyn_cast<SelectInst>(I0))
  1273. if (Instruction *R = FoldOpIntoSelect(*II, Sel))
  1274. return R;
  1275. if (Instruction *NewMinMax = reassociateMinMaxWithConstants(II))
  1276. return NewMinMax;
  1277. if (Instruction *R = reassociateMinMaxWithConstantInOperand(II, Builder))
  1278. return R;
  1279. if (Instruction *NewMinMax = factorizeMinMaxTree(II))
  1280. return NewMinMax;
  1281. break;
  1282. }
  1283. case Intrinsic::bitreverse: {
  1284. // bitrev (zext i1 X to ?) --> X ? SignBitC : 0
  1285. Value *X;
  1286. if (match(II->getArgOperand(0), m_ZExt(m_Value(X))) &&
  1287. X->getType()->isIntOrIntVectorTy(1)) {
  1288. Type *Ty = II->getType();
  1289. APInt SignBit = APInt::getSignMask(Ty->getScalarSizeInBits());
  1290. return SelectInst::Create(X, ConstantInt::get(Ty, SignBit),
  1291. ConstantInt::getNullValue(Ty));
  1292. }
  1293. break;
  1294. }
  1295. case Intrinsic::bswap: {
  1296. Value *IIOperand = II->getArgOperand(0);
  1297. // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
  1298. // inverse-shift-of-bswap:
  1299. // bswap (shl X, Y) --> lshr (bswap X), Y
  1300. // bswap (lshr X, Y) --> shl (bswap X), Y
  1301. Value *X, *Y;
  1302. if (match(IIOperand, m_OneUse(m_LogicalShift(m_Value(X), m_Value(Y))))) {
  1303. // The transform allows undef vector elements, so try a constant match
  1304. // first. If knownbits can handle that case, that clause could be removed.
  1305. unsigned BitWidth = IIOperand->getType()->getScalarSizeInBits();
  1306. const APInt *C;
  1307. if ((match(Y, m_APIntAllowUndef(C)) && (*C & 7) == 0) ||
  1308. MaskedValueIsZero(Y, APInt::getLowBitsSet(BitWidth, 3))) {
  1309. Value *NewSwap = Builder.CreateUnaryIntrinsic(Intrinsic::bswap, X);
  1310. BinaryOperator::BinaryOps InverseShift =
  1311. cast<BinaryOperator>(IIOperand)->getOpcode() == Instruction::Shl
  1312. ? Instruction::LShr
  1313. : Instruction::Shl;
  1314. return BinaryOperator::Create(InverseShift, NewSwap, Y);
  1315. }
  1316. }
  1317. KnownBits Known = computeKnownBits(IIOperand, 0, II);
  1318. uint64_t LZ = alignDown(Known.countMinLeadingZeros(), 8);
  1319. uint64_t TZ = alignDown(Known.countMinTrailingZeros(), 8);
  1320. unsigned BW = Known.getBitWidth();
  1321. // bswap(x) -> shift(x) if x has exactly one "active byte"
  1322. if (BW - LZ - TZ == 8) {
  1323. assert(LZ != TZ && "active byte cannot be in the middle");
  1324. if (LZ > TZ) // -> shl(x) if the "active byte" is in the low part of x
  1325. return BinaryOperator::CreateNUWShl(
  1326. IIOperand, ConstantInt::get(IIOperand->getType(), LZ - TZ));
  1327. // -> lshr(x) if the "active byte" is in the high part of x
  1328. return BinaryOperator::CreateExactLShr(
  1329. IIOperand, ConstantInt::get(IIOperand->getType(), TZ - LZ));
  1330. }
  1331. // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
  1332. if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) {
  1333. unsigned C = X->getType()->getScalarSizeInBits() - BW;
  1334. Value *CV = ConstantInt::get(X->getType(), C);
  1335. Value *V = Builder.CreateLShr(X, CV);
  1336. return new TruncInst(V, IIOperand->getType());
  1337. }
  1338. break;
  1339. }
  1340. case Intrinsic::masked_load:
  1341. if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II))
  1342. return replaceInstUsesWith(CI, SimplifiedMaskedOp);
  1343. break;
  1344. case Intrinsic::masked_store:
  1345. return simplifyMaskedStore(*II);
  1346. case Intrinsic::masked_gather:
  1347. return simplifyMaskedGather(*II);
  1348. case Intrinsic::masked_scatter:
  1349. return simplifyMaskedScatter(*II);
  1350. case Intrinsic::launder_invariant_group:
  1351. case Intrinsic::strip_invariant_group:
  1352. if (auto *SkippedBarrier = simplifyInvariantGroupIntrinsic(*II, *this))
  1353. return replaceInstUsesWith(*II, SkippedBarrier);
  1354. break;
  1355. case Intrinsic::powi:
  1356. if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
  1357. // 0 and 1 are handled in instsimplify
  1358. // powi(x, -1) -> 1/x
  1359. if (Power->isMinusOne())
  1360. return BinaryOperator::CreateFDivFMF(ConstantFP::get(CI.getType(), 1.0),
  1361. II->getArgOperand(0), II);
  1362. // powi(x, 2) -> x*x
  1363. if (Power->equalsInt(2))
  1364. return BinaryOperator::CreateFMulFMF(II->getArgOperand(0),
  1365. II->getArgOperand(0), II);
  1366. if (!Power->getValue()[0]) {
  1367. Value *X;
  1368. // If power is even:
  1369. // powi(-x, p) -> powi(x, p)
  1370. // powi(fabs(x), p) -> powi(x, p)
  1371. // powi(copysign(x, y), p) -> powi(x, p)
  1372. if (match(II->getArgOperand(0), m_FNeg(m_Value(X))) ||
  1373. match(II->getArgOperand(0), m_FAbs(m_Value(X))) ||
  1374. match(II->getArgOperand(0),
  1375. m_Intrinsic<Intrinsic::copysign>(m_Value(X), m_Value())))
  1376. return replaceOperand(*II, 0, X);
  1377. }
  1378. }
  1379. break;
  1380. case Intrinsic::cttz:
  1381. case Intrinsic::ctlz:
  1382. if (auto *I = foldCttzCtlz(*II, *this))
  1383. return I;
  1384. break;
  1385. case Intrinsic::ctpop:
  1386. if (auto *I = foldCtpop(*II, *this))
  1387. return I;
  1388. break;
  1389. case Intrinsic::fshl:
  1390. case Intrinsic::fshr: {
  1391. Value *Op0 = II->getArgOperand(0), *Op1 = II->getArgOperand(1);
  1392. Type *Ty = II->getType();
  1393. unsigned BitWidth = Ty->getScalarSizeInBits();
  1394. Constant *ShAmtC;
  1395. if (match(II->getArgOperand(2), m_ImmConstant(ShAmtC))) {
  1396. // Canonicalize a shift amount constant operand to modulo the bit-width.
  1397. Constant *WidthC = ConstantInt::get(Ty, BitWidth);
  1398. Constant *ModuloC =
  1399. ConstantFoldBinaryOpOperands(Instruction::URem, ShAmtC, WidthC, DL);
  1400. if (!ModuloC)
  1401. return nullptr;
  1402. if (ModuloC != ShAmtC)
  1403. return replaceOperand(*II, 2, ModuloC);
  1404. assert(ConstantExpr::getICmp(ICmpInst::ICMP_UGT, WidthC, ShAmtC) ==
  1405. ConstantInt::getTrue(CmpInst::makeCmpResultType(Ty)) &&
  1406. "Shift amount expected to be modulo bitwidth");
  1407. // Canonicalize funnel shift right by constant to funnel shift left. This
  1408. // is not entirely arbitrary. For historical reasons, the backend may
  1409. // recognize rotate left patterns but miss rotate right patterns.
  1410. if (IID == Intrinsic::fshr) {
  1411. // fshr X, Y, C --> fshl X, Y, (BitWidth - C)
  1412. Constant *LeftShiftC = ConstantExpr::getSub(WidthC, ShAmtC);
  1413. Module *Mod = II->getModule();
  1414. Function *Fshl = Intrinsic::getDeclaration(Mod, Intrinsic::fshl, Ty);
  1415. return CallInst::Create(Fshl, { Op0, Op1, LeftShiftC });
  1416. }
  1417. assert(IID == Intrinsic::fshl &&
  1418. "All funnel shifts by simple constants should go left");
  1419. // fshl(X, 0, C) --> shl X, C
  1420. // fshl(X, undef, C) --> shl X, C
  1421. if (match(Op1, m_ZeroInt()) || match(Op1, m_Undef()))
  1422. return BinaryOperator::CreateShl(Op0, ShAmtC);
  1423. // fshl(0, X, C) --> lshr X, (BW-C)
  1424. // fshl(undef, X, C) --> lshr X, (BW-C)
  1425. if (match(Op0, m_ZeroInt()) || match(Op0, m_Undef()))
  1426. return BinaryOperator::CreateLShr(Op1,
  1427. ConstantExpr::getSub(WidthC, ShAmtC));
  1428. // fshl i16 X, X, 8 --> bswap i16 X (reduce to more-specific form)
  1429. if (Op0 == Op1 && BitWidth == 16 && match(ShAmtC, m_SpecificInt(8))) {
  1430. Module *Mod = II->getModule();
  1431. Function *Bswap = Intrinsic::getDeclaration(Mod, Intrinsic::bswap, Ty);
  1432. return CallInst::Create(Bswap, { Op0 });
  1433. }
  1434. }
  1435. // Left or right might be masked.
  1436. if (SimplifyDemandedInstructionBits(*II))
  1437. return &CI;
  1438. // The shift amount (operand 2) of a funnel shift is modulo the bitwidth,
  1439. // so only the low bits of the shift amount are demanded if the bitwidth is
  1440. // a power-of-2.
  1441. if (!isPowerOf2_32(BitWidth))
  1442. break;
  1443. APInt Op2Demanded = APInt::getLowBitsSet(BitWidth, Log2_32_Ceil(BitWidth));
  1444. KnownBits Op2Known(BitWidth);
  1445. if (SimplifyDemandedBits(II, 2, Op2Demanded, Op2Known))
  1446. return &CI;
  1447. break;
  1448. }
  1449. case Intrinsic::uadd_with_overflow:
  1450. case Intrinsic::sadd_with_overflow: {
  1451. if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
  1452. return I;
  1453. // Given 2 constant operands whose sum does not overflow:
  1454. // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
  1455. // saddo (X +nsw C0), C1 -> saddo X, C0 + C1
  1456. Value *X;
  1457. const APInt *C0, *C1;
  1458. Value *Arg0 = II->getArgOperand(0);
  1459. Value *Arg1 = II->getArgOperand(1);
  1460. bool IsSigned = IID == Intrinsic::sadd_with_overflow;
  1461. bool HasNWAdd = IsSigned ? match(Arg0, m_NSWAdd(m_Value(X), m_APInt(C0)))
  1462. : match(Arg0, m_NUWAdd(m_Value(X), m_APInt(C0)));
  1463. if (HasNWAdd && match(Arg1, m_APInt(C1))) {
  1464. bool Overflow;
  1465. APInt NewC =
  1466. IsSigned ? C1->sadd_ov(*C0, Overflow) : C1->uadd_ov(*C0, Overflow);
  1467. if (!Overflow)
  1468. return replaceInstUsesWith(
  1469. *II, Builder.CreateBinaryIntrinsic(
  1470. IID, X, ConstantInt::get(Arg1->getType(), NewC)));
  1471. }
  1472. break;
  1473. }
  1474. case Intrinsic::umul_with_overflow:
  1475. case Intrinsic::smul_with_overflow:
  1476. case Intrinsic::usub_with_overflow:
  1477. if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
  1478. return I;
  1479. break;
  1480. case Intrinsic::ssub_with_overflow: {
  1481. if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
  1482. return I;
  1483. Constant *C;
  1484. Value *Arg0 = II->getArgOperand(0);
  1485. Value *Arg1 = II->getArgOperand(1);
  1486. // Given a constant C that is not the minimum signed value
  1487. // for an integer of a given bit width:
  1488. //
  1489. // ssubo X, C -> saddo X, -C
  1490. if (match(Arg1, m_Constant(C)) && C->isNotMinSignedValue()) {
  1491. Value *NegVal = ConstantExpr::getNeg(C);
  1492. // Build a saddo call that is equivalent to the discovered
  1493. // ssubo call.
  1494. return replaceInstUsesWith(
  1495. *II, Builder.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow,
  1496. Arg0, NegVal));
  1497. }
  1498. break;
  1499. }
  1500. case Intrinsic::uadd_sat:
  1501. case Intrinsic::sadd_sat:
  1502. case Intrinsic::usub_sat:
  1503. case Intrinsic::ssub_sat: {
  1504. SaturatingInst *SI = cast<SaturatingInst>(II);
  1505. Type *Ty = SI->getType();
  1506. Value *Arg0 = SI->getLHS();
  1507. Value *Arg1 = SI->getRHS();
  1508. // Make use of known overflow information.
  1509. OverflowResult OR = computeOverflow(SI->getBinaryOp(), SI->isSigned(),
  1510. Arg0, Arg1, SI);
  1511. switch (OR) {
  1512. case OverflowResult::MayOverflow:
  1513. break;
  1514. case OverflowResult::NeverOverflows:
  1515. if (SI->isSigned())
  1516. return BinaryOperator::CreateNSW(SI->getBinaryOp(), Arg0, Arg1);
  1517. else
  1518. return BinaryOperator::CreateNUW(SI->getBinaryOp(), Arg0, Arg1);
  1519. case OverflowResult::AlwaysOverflowsLow: {
  1520. unsigned BitWidth = Ty->getScalarSizeInBits();
  1521. APInt Min = APSInt::getMinValue(BitWidth, !SI->isSigned());
  1522. return replaceInstUsesWith(*SI, ConstantInt::get(Ty, Min));
  1523. }
  1524. case OverflowResult::AlwaysOverflowsHigh: {
  1525. unsigned BitWidth = Ty->getScalarSizeInBits();
  1526. APInt Max = APSInt::getMaxValue(BitWidth, !SI->isSigned());
  1527. return replaceInstUsesWith(*SI, ConstantInt::get(Ty, Max));
  1528. }
  1529. }
  1530. // ssub.sat(X, C) -> sadd.sat(X, -C) if C != MIN
  1531. Constant *C;
  1532. if (IID == Intrinsic::ssub_sat && match(Arg1, m_Constant(C)) &&
  1533. C->isNotMinSignedValue()) {
  1534. Value *NegVal = ConstantExpr::getNeg(C);
  1535. return replaceInstUsesWith(
  1536. *II, Builder.CreateBinaryIntrinsic(
  1537. Intrinsic::sadd_sat, Arg0, NegVal));
  1538. }
  1539. // sat(sat(X + Val2) + Val) -> sat(X + (Val+Val2))
  1540. // sat(sat(X - Val2) - Val) -> sat(X - (Val+Val2))
  1541. // if Val and Val2 have the same sign
  1542. if (auto *Other = dyn_cast<IntrinsicInst>(Arg0)) {
  1543. Value *X;
  1544. const APInt *Val, *Val2;
  1545. APInt NewVal;
  1546. bool IsUnsigned =
  1547. IID == Intrinsic::uadd_sat || IID == Intrinsic::usub_sat;
  1548. if (Other->getIntrinsicID() == IID &&
  1549. match(Arg1, m_APInt(Val)) &&
  1550. match(Other->getArgOperand(0), m_Value(X)) &&
  1551. match(Other->getArgOperand(1), m_APInt(Val2))) {
  1552. if (IsUnsigned)
  1553. NewVal = Val->uadd_sat(*Val2);
  1554. else if (Val->isNonNegative() == Val2->isNonNegative()) {
  1555. bool Overflow;
  1556. NewVal = Val->sadd_ov(*Val2, Overflow);
  1557. if (Overflow) {
  1558. // Both adds together may add more than SignedMaxValue
  1559. // without saturating the final result.
  1560. break;
  1561. }
  1562. } else {
  1563. // Cannot fold saturated addition with different signs.
  1564. break;
  1565. }
  1566. return replaceInstUsesWith(
  1567. *II, Builder.CreateBinaryIntrinsic(
  1568. IID, X, ConstantInt::get(II->getType(), NewVal)));
  1569. }
  1570. }
  1571. break;
  1572. }
  1573. case Intrinsic::minnum:
  1574. case Intrinsic::maxnum:
  1575. case Intrinsic::minimum:
  1576. case Intrinsic::maximum: {
  1577. Value *Arg0 = II->getArgOperand(0);
  1578. Value *Arg1 = II->getArgOperand(1);
  1579. Value *X, *Y;
  1580. if (match(Arg0, m_FNeg(m_Value(X))) && match(Arg1, m_FNeg(m_Value(Y))) &&
  1581. (Arg0->hasOneUse() || Arg1->hasOneUse())) {
  1582. // If both operands are negated, invert the call and negate the result:
  1583. // min(-X, -Y) --> -(max(X, Y))
  1584. // max(-X, -Y) --> -(min(X, Y))
  1585. Intrinsic::ID NewIID;
  1586. switch (IID) {
  1587. case Intrinsic::maxnum:
  1588. NewIID = Intrinsic::minnum;
  1589. break;
  1590. case Intrinsic::minnum:
  1591. NewIID = Intrinsic::maxnum;
  1592. break;
  1593. case Intrinsic::maximum:
  1594. NewIID = Intrinsic::minimum;
  1595. break;
  1596. case Intrinsic::minimum:
  1597. NewIID = Intrinsic::maximum;
  1598. break;
  1599. default:
  1600. llvm_unreachable("unexpected intrinsic ID");
  1601. }
  1602. Value *NewCall = Builder.CreateBinaryIntrinsic(NewIID, X, Y, II);
  1603. Instruction *FNeg = UnaryOperator::CreateFNeg(NewCall);
  1604. FNeg->copyIRFlags(II);
  1605. return FNeg;
  1606. }
  1607. // m(m(X, C2), C1) -> m(X, C)
  1608. const APFloat *C1, *C2;
  1609. if (auto *M = dyn_cast<IntrinsicInst>(Arg0)) {
  1610. if (M->getIntrinsicID() == IID && match(Arg1, m_APFloat(C1)) &&
  1611. ((match(M->getArgOperand(0), m_Value(X)) &&
  1612. match(M->getArgOperand(1), m_APFloat(C2))) ||
  1613. (match(M->getArgOperand(1), m_Value(X)) &&
  1614. match(M->getArgOperand(0), m_APFloat(C2))))) {
  1615. APFloat Res(0.0);
  1616. switch (IID) {
  1617. case Intrinsic::maxnum:
  1618. Res = maxnum(*C1, *C2);
  1619. break;
  1620. case Intrinsic::minnum:
  1621. Res = minnum(*C1, *C2);
  1622. break;
  1623. case Intrinsic::maximum:
  1624. Res = maximum(*C1, *C2);
  1625. break;
  1626. case Intrinsic::minimum:
  1627. Res = minimum(*C1, *C2);
  1628. break;
  1629. default:
  1630. llvm_unreachable("unexpected intrinsic ID");
  1631. }
  1632. Instruction *NewCall = Builder.CreateBinaryIntrinsic(
  1633. IID, X, ConstantFP::get(Arg0->getType(), Res), II);
  1634. // TODO: Conservatively intersecting FMF. If Res == C2, the transform
  1635. // was a simplification (so Arg0 and its original flags could
  1636. // propagate?)
  1637. NewCall->andIRFlags(M);
  1638. return replaceInstUsesWith(*II, NewCall);
  1639. }
  1640. }
  1641. // m((fpext X), (fpext Y)) -> fpext (m(X, Y))
  1642. if (match(Arg0, m_OneUse(m_FPExt(m_Value(X)))) &&
  1643. match(Arg1, m_OneUse(m_FPExt(m_Value(Y)))) &&
  1644. X->getType() == Y->getType()) {
  1645. Value *NewCall =
  1646. Builder.CreateBinaryIntrinsic(IID, X, Y, II, II->getName());
  1647. return new FPExtInst(NewCall, II->getType());
  1648. }
  1649. // max X, -X --> fabs X
  1650. // min X, -X --> -(fabs X)
  1651. // TODO: Remove one-use limitation? That is obviously better for max.
  1652. // It would be an extra instruction for min (fnabs), but that is
  1653. // still likely better for analysis and codegen.
  1654. if ((match(Arg0, m_OneUse(m_FNeg(m_Value(X)))) && Arg1 == X) ||
  1655. (match(Arg1, m_OneUse(m_FNeg(m_Value(X)))) && Arg0 == X)) {
  1656. Value *R = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, II);
  1657. if (IID == Intrinsic::minimum || IID == Intrinsic::minnum)
  1658. R = Builder.CreateFNegFMF(R, II);
  1659. return replaceInstUsesWith(*II, R);
  1660. }
  1661. break;
  1662. }
  1663. case Intrinsic::matrix_multiply: {
  1664. // Optimize negation in matrix multiplication.
  1665. // -A * -B -> A * B
  1666. Value *A, *B;
  1667. if (match(II->getArgOperand(0), m_FNeg(m_Value(A))) &&
  1668. match(II->getArgOperand(1), m_FNeg(m_Value(B)))) {
  1669. replaceOperand(*II, 0, A);
  1670. replaceOperand(*II, 1, B);
  1671. return II;
  1672. }
  1673. Value *Op0 = II->getOperand(0);
  1674. Value *Op1 = II->getOperand(1);
  1675. Value *OpNotNeg, *NegatedOp;
  1676. unsigned NegatedOpArg, OtherOpArg;
  1677. if (match(Op0, m_FNeg(m_Value(OpNotNeg)))) {
  1678. NegatedOp = Op0;
  1679. NegatedOpArg = 0;
  1680. OtherOpArg = 1;
  1681. } else if (match(Op1, m_FNeg(m_Value(OpNotNeg)))) {
  1682. NegatedOp = Op1;
  1683. NegatedOpArg = 1;
  1684. OtherOpArg = 0;
  1685. } else
  1686. // Multiplication doesn't have a negated operand.
  1687. break;
  1688. // Only optimize if the negated operand has only one use.
  1689. if (!NegatedOp->hasOneUse())
  1690. break;
  1691. Value *OtherOp = II->getOperand(OtherOpArg);
  1692. VectorType *RetTy = cast<VectorType>(II->getType());
  1693. VectorType *NegatedOpTy = cast<VectorType>(NegatedOp->getType());
  1694. VectorType *OtherOpTy = cast<VectorType>(OtherOp->getType());
  1695. ElementCount NegatedCount = NegatedOpTy->getElementCount();
  1696. ElementCount OtherCount = OtherOpTy->getElementCount();
  1697. ElementCount RetCount = RetTy->getElementCount();
  1698. // (-A) * B -> A * (-B), if it is cheaper to negate B and vice versa.
  1699. if (ElementCount::isKnownGT(NegatedCount, OtherCount) &&
  1700. ElementCount::isKnownLT(OtherCount, RetCount)) {
  1701. Value *InverseOtherOp = Builder.CreateFNeg(OtherOp);
  1702. replaceOperand(*II, NegatedOpArg, OpNotNeg);
  1703. replaceOperand(*II, OtherOpArg, InverseOtherOp);
  1704. return II;
  1705. }
  1706. // (-A) * B -> -(A * B), if it is cheaper to negate the result
  1707. if (ElementCount::isKnownGT(NegatedCount, RetCount)) {
  1708. SmallVector<Value *, 5> NewArgs(II->args());
  1709. NewArgs[NegatedOpArg] = OpNotNeg;
  1710. Instruction *NewMul =
  1711. Builder.CreateIntrinsic(II->getType(), IID, NewArgs, II);
  1712. return replaceInstUsesWith(*II, Builder.CreateFNegFMF(NewMul, II));
  1713. }
  1714. break;
  1715. }
  1716. case Intrinsic::fmuladd: {
  1717. // Canonicalize fast fmuladd to the separate fmul + fadd.
  1718. if (II->isFast()) {
  1719. BuilderTy::FastMathFlagGuard Guard(Builder);
  1720. Builder.setFastMathFlags(II->getFastMathFlags());
  1721. Value *Mul = Builder.CreateFMul(II->getArgOperand(0),
  1722. II->getArgOperand(1));
  1723. Value *Add = Builder.CreateFAdd(Mul, II->getArgOperand(2));
  1724. Add->takeName(II);
  1725. return replaceInstUsesWith(*II, Add);
  1726. }
  1727. // Try to simplify the underlying FMul.
  1728. if (Value *V = simplifyFMulInst(II->getArgOperand(0), II->getArgOperand(1),
  1729. II->getFastMathFlags(),
  1730. SQ.getWithInstruction(II))) {
  1731. auto *FAdd = BinaryOperator::CreateFAdd(V, II->getArgOperand(2));
  1732. FAdd->copyFastMathFlags(II);
  1733. return FAdd;
  1734. }
  1735. [[fallthrough]];
  1736. }
  1737. case Intrinsic::fma: {
  1738. // fma fneg(x), fneg(y), z -> fma x, y, z
  1739. Value *Src0 = II->getArgOperand(0);
  1740. Value *Src1 = II->getArgOperand(1);
  1741. Value *X, *Y;
  1742. if (match(Src0, m_FNeg(m_Value(X))) && match(Src1, m_FNeg(m_Value(Y)))) {
  1743. replaceOperand(*II, 0, X);
  1744. replaceOperand(*II, 1, Y);
  1745. return II;
  1746. }
  1747. // fma fabs(x), fabs(x), z -> fma x, x, z
  1748. if (match(Src0, m_FAbs(m_Value(X))) &&
  1749. match(Src1, m_FAbs(m_Specific(X)))) {
  1750. replaceOperand(*II, 0, X);
  1751. replaceOperand(*II, 1, X);
  1752. return II;
  1753. }
  1754. // Try to simplify the underlying FMul. We can only apply simplifications
  1755. // that do not require rounding.
  1756. if (Value *V = simplifyFMAFMul(II->getArgOperand(0), II->getArgOperand(1),
  1757. II->getFastMathFlags(),
  1758. SQ.getWithInstruction(II))) {
  1759. auto *FAdd = BinaryOperator::CreateFAdd(V, II->getArgOperand(2));
  1760. FAdd->copyFastMathFlags(II);
  1761. return FAdd;
  1762. }
  1763. // fma x, y, 0 -> fmul x, y
  1764. // This is always valid for -0.0, but requires nsz for +0.0 as
  1765. // -0.0 + 0.0 = 0.0, which would not be the same as the fmul on its own.
  1766. if (match(II->getArgOperand(2), m_NegZeroFP()) ||
  1767. (match(II->getArgOperand(2), m_PosZeroFP()) &&
  1768. II->getFastMathFlags().noSignedZeros()))
  1769. return BinaryOperator::CreateFMulFMF(Src0, Src1, II);
  1770. break;
  1771. }
  1772. case Intrinsic::copysign: {
  1773. Value *Mag = II->getArgOperand(0), *Sign = II->getArgOperand(1);
  1774. if (SignBitMustBeZero(Sign, &TLI)) {
  1775. // If we know that the sign argument is positive, reduce to FABS:
  1776. // copysign Mag, +Sign --> fabs Mag
  1777. Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Mag, II);
  1778. return replaceInstUsesWith(*II, Fabs);
  1779. }
  1780. // TODO: There should be a ValueTracking sibling like SignBitMustBeOne.
  1781. const APFloat *C;
  1782. if (match(Sign, m_APFloat(C)) && C->isNegative()) {
  1783. // If we know that the sign argument is negative, reduce to FNABS:
  1784. // copysign Mag, -Sign --> fneg (fabs Mag)
  1785. Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Mag, II);
  1786. return replaceInstUsesWith(*II, Builder.CreateFNegFMF(Fabs, II));
  1787. }
  1788. // Propagate sign argument through nested calls:
  1789. // copysign Mag, (copysign ?, X) --> copysign Mag, X
  1790. Value *X;
  1791. if (match(Sign, m_Intrinsic<Intrinsic::copysign>(m_Value(), m_Value(X))))
  1792. return replaceOperand(*II, 1, X);
  1793. // Peek through changes of magnitude's sign-bit. This call rewrites those:
  1794. // copysign (fabs X), Sign --> copysign X, Sign
  1795. // copysign (fneg X), Sign --> copysign X, Sign
  1796. if (match(Mag, m_FAbs(m_Value(X))) || match(Mag, m_FNeg(m_Value(X))))
  1797. return replaceOperand(*II, 0, X);
  1798. break;
  1799. }
  1800. case Intrinsic::fabs: {
  1801. Value *Cond, *TVal, *FVal;
  1802. if (match(II->getArgOperand(0),
  1803. m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))) {
  1804. // fabs (select Cond, TrueC, FalseC) --> select Cond, AbsT, AbsF
  1805. if (isa<Constant>(TVal) && isa<Constant>(FVal)) {
  1806. CallInst *AbsT = Builder.CreateCall(II->getCalledFunction(), {TVal});
  1807. CallInst *AbsF = Builder.CreateCall(II->getCalledFunction(), {FVal});
  1808. return SelectInst::Create(Cond, AbsT, AbsF);
  1809. }
  1810. // fabs (select Cond, -FVal, FVal) --> fabs FVal
  1811. if (match(TVal, m_FNeg(m_Specific(FVal))))
  1812. return replaceOperand(*II, 0, FVal);
  1813. // fabs (select Cond, TVal, -TVal) --> fabs TVal
  1814. if (match(FVal, m_FNeg(m_Specific(TVal))))
  1815. return replaceOperand(*II, 0, TVal);
  1816. }
  1817. Value *Magnitude, *Sign;
  1818. if (match(II->getArgOperand(0),
  1819. m_CopySign(m_Value(Magnitude), m_Value(Sign)))) {
  1820. // fabs (copysign x, y) -> (fabs x)
  1821. CallInst *AbsSign =
  1822. Builder.CreateCall(II->getCalledFunction(), {Magnitude});
  1823. AbsSign->copyFastMathFlags(II);
  1824. return replaceInstUsesWith(*II, AbsSign);
  1825. }
  1826. [[fallthrough]];
  1827. }
  1828. case Intrinsic::ceil:
  1829. case Intrinsic::floor:
  1830. case Intrinsic::round:
  1831. case Intrinsic::roundeven:
  1832. case Intrinsic::nearbyint:
  1833. case Intrinsic::rint:
  1834. case Intrinsic::trunc: {
  1835. Value *ExtSrc;
  1836. if (match(II->getArgOperand(0), m_OneUse(m_FPExt(m_Value(ExtSrc))))) {
  1837. // Narrow the call: intrinsic (fpext x) -> fpext (intrinsic x)
  1838. Value *NarrowII = Builder.CreateUnaryIntrinsic(IID, ExtSrc, II);
  1839. return new FPExtInst(NarrowII, II->getType());
  1840. }
  1841. break;
  1842. }
  1843. case Intrinsic::cos:
  1844. case Intrinsic::amdgcn_cos: {
  1845. Value *X;
  1846. Value *Src = II->getArgOperand(0);
  1847. if (match(Src, m_FNeg(m_Value(X))) || match(Src, m_FAbs(m_Value(X)))) {
  1848. // cos(-x) -> cos(x)
  1849. // cos(fabs(x)) -> cos(x)
  1850. return replaceOperand(*II, 0, X);
  1851. }
  1852. break;
  1853. }
  1854. case Intrinsic::sin: {
  1855. Value *X;
  1856. if (match(II->getArgOperand(0), m_OneUse(m_FNeg(m_Value(X))))) {
  1857. // sin(-x) --> -sin(x)
  1858. Value *NewSin = Builder.CreateUnaryIntrinsic(Intrinsic::sin, X, II);
  1859. Instruction *FNeg = UnaryOperator::CreateFNeg(NewSin);
  1860. FNeg->copyFastMathFlags(II);
  1861. return FNeg;
  1862. }
  1863. break;
  1864. }
  1865. case Intrinsic::ptrauth_auth:
  1866. case Intrinsic::ptrauth_resign: {
  1867. // (sign|resign) + (auth|resign) can be folded by omitting the middle
  1868. // sign+auth component if the key and discriminator match.
  1869. bool NeedSign = II->getIntrinsicID() == Intrinsic::ptrauth_resign;
  1870. Value *Key = II->getArgOperand(1);
  1871. Value *Disc = II->getArgOperand(2);
  1872. // AuthKey will be the key we need to end up authenticating against in
  1873. // whatever we replace this sequence with.
  1874. Value *AuthKey = nullptr, *AuthDisc = nullptr, *BasePtr;
  1875. if (auto CI = dyn_cast<CallBase>(II->getArgOperand(0))) {
  1876. BasePtr = CI->getArgOperand(0);
  1877. if (CI->getIntrinsicID() == Intrinsic::ptrauth_sign) {
  1878. if (CI->getArgOperand(1) != Key || CI->getArgOperand(2) != Disc)
  1879. break;
  1880. } else if (CI->getIntrinsicID() == Intrinsic::ptrauth_resign) {
  1881. if (CI->getArgOperand(3) != Key || CI->getArgOperand(4) != Disc)
  1882. break;
  1883. AuthKey = CI->getArgOperand(1);
  1884. AuthDisc = CI->getArgOperand(2);
  1885. } else
  1886. break;
  1887. } else
  1888. break;
  1889. unsigned NewIntrin;
  1890. if (AuthKey && NeedSign) {
  1891. // resign(0,1) + resign(1,2) = resign(0, 2)
  1892. NewIntrin = Intrinsic::ptrauth_resign;
  1893. } else if (AuthKey) {
  1894. // resign(0,1) + auth(1) = auth(0)
  1895. NewIntrin = Intrinsic::ptrauth_auth;
  1896. } else if (NeedSign) {
  1897. // sign(0) + resign(0, 1) = sign(1)
  1898. NewIntrin = Intrinsic::ptrauth_sign;
  1899. } else {
  1900. // sign(0) + auth(0) = nop
  1901. replaceInstUsesWith(*II, BasePtr);
  1902. eraseInstFromFunction(*II);
  1903. return nullptr;
  1904. }
  1905. SmallVector<Value *, 4> CallArgs;
  1906. CallArgs.push_back(BasePtr);
  1907. if (AuthKey) {
  1908. CallArgs.push_back(AuthKey);
  1909. CallArgs.push_back(AuthDisc);
  1910. }
  1911. if (NeedSign) {
  1912. CallArgs.push_back(II->getArgOperand(3));
  1913. CallArgs.push_back(II->getArgOperand(4));
  1914. }
  1915. Function *NewFn = Intrinsic::getDeclaration(II->getModule(), NewIntrin);
  1916. return CallInst::Create(NewFn, CallArgs);
  1917. }
  1918. case Intrinsic::arm_neon_vtbl1:
  1919. case Intrinsic::aarch64_neon_tbl1:
  1920. if (Value *V = simplifyNeonTbl1(*II, Builder))
  1921. return replaceInstUsesWith(*II, V);
  1922. break;
  1923. case Intrinsic::arm_neon_vmulls:
  1924. case Intrinsic::arm_neon_vmullu:
  1925. case Intrinsic::aarch64_neon_smull:
  1926. case Intrinsic::aarch64_neon_umull: {
  1927. Value *Arg0 = II->getArgOperand(0);
  1928. Value *Arg1 = II->getArgOperand(1);
  1929. // Handle mul by zero first:
  1930. if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1)) {
  1931. return replaceInstUsesWith(CI, ConstantAggregateZero::get(II->getType()));
  1932. }
  1933. // Check for constant LHS & RHS - in this case we just simplify.
  1934. bool Zext = (IID == Intrinsic::arm_neon_vmullu ||
  1935. IID == Intrinsic::aarch64_neon_umull);
  1936. VectorType *NewVT = cast<VectorType>(II->getType());
  1937. if (Constant *CV0 = dyn_cast<Constant>(Arg0)) {
  1938. if (Constant *CV1 = dyn_cast<Constant>(Arg1)) {
  1939. CV0 = ConstantExpr::getIntegerCast(CV0, NewVT, /*isSigned=*/!Zext);
  1940. CV1 = ConstantExpr::getIntegerCast(CV1, NewVT, /*isSigned=*/!Zext);
  1941. return replaceInstUsesWith(CI, ConstantExpr::getMul(CV0, CV1));
  1942. }
  1943. // Couldn't simplify - canonicalize constant to the RHS.
  1944. std::swap(Arg0, Arg1);
  1945. }
  1946. // Handle mul by one:
  1947. if (Constant *CV1 = dyn_cast<Constant>(Arg1))
  1948. if (ConstantInt *Splat =
  1949. dyn_cast_or_null<ConstantInt>(CV1->getSplatValue()))
  1950. if (Splat->isOne())
  1951. return CastInst::CreateIntegerCast(Arg0, II->getType(),
  1952. /*isSigned=*/!Zext);
  1953. break;
  1954. }
  1955. case Intrinsic::arm_neon_aesd:
  1956. case Intrinsic::arm_neon_aese:
  1957. case Intrinsic::aarch64_crypto_aesd:
  1958. case Intrinsic::aarch64_crypto_aese: {
  1959. Value *DataArg = II->getArgOperand(0);
  1960. Value *KeyArg = II->getArgOperand(1);
  1961. // Try to use the builtin XOR in AESE and AESD to eliminate a prior XOR
  1962. Value *Data, *Key;
  1963. if (match(KeyArg, m_ZeroInt()) &&
  1964. match(DataArg, m_Xor(m_Value(Data), m_Value(Key)))) {
  1965. replaceOperand(*II, 0, Data);
  1966. replaceOperand(*II, 1, Key);
  1967. return II;
  1968. }
  1969. break;
  1970. }
  1971. case Intrinsic::hexagon_V6_vandvrt:
  1972. case Intrinsic::hexagon_V6_vandvrt_128B: {
  1973. // Simplify Q -> V -> Q conversion.
  1974. if (auto Op0 = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
  1975. Intrinsic::ID ID0 = Op0->getIntrinsicID();
  1976. if (ID0 != Intrinsic::hexagon_V6_vandqrt &&
  1977. ID0 != Intrinsic::hexagon_V6_vandqrt_128B)
  1978. break;
  1979. Value *Bytes = Op0->getArgOperand(1), *Mask = II->getArgOperand(1);
  1980. uint64_t Bytes1 = computeKnownBits(Bytes, 0, Op0).One.getZExtValue();
  1981. uint64_t Mask1 = computeKnownBits(Mask, 0, II).One.getZExtValue();
  1982. // Check if every byte has common bits in Bytes and Mask.
  1983. uint64_t C = Bytes1 & Mask1;
  1984. if ((C & 0xFF) && (C & 0xFF00) && (C & 0xFF0000) && (C & 0xFF000000))
  1985. return replaceInstUsesWith(*II, Op0->getArgOperand(0));
  1986. }
  1987. break;
  1988. }
  1989. case Intrinsic::stackrestore: {
  1990. enum class ClassifyResult {
  1991. None,
  1992. Alloca,
  1993. StackRestore,
  1994. CallWithSideEffects,
  1995. };
  1996. auto Classify = [](const Instruction *I) {
  1997. if (isa<AllocaInst>(I))
  1998. return ClassifyResult::Alloca;
  1999. if (auto *CI = dyn_cast<CallInst>(I)) {
  2000. if (auto *II = dyn_cast<IntrinsicInst>(CI)) {
  2001. if (II->getIntrinsicID() == Intrinsic::stackrestore)
  2002. return ClassifyResult::StackRestore;
  2003. if (II->mayHaveSideEffects())
  2004. return ClassifyResult::CallWithSideEffects;
  2005. } else {
  2006. // Consider all non-intrinsic calls to be side effects
  2007. return ClassifyResult::CallWithSideEffects;
  2008. }
  2009. }
  2010. return ClassifyResult::None;
  2011. };
  2012. // If the stacksave and the stackrestore are in the same BB, and there is
  2013. // no intervening call, alloca, or stackrestore of a different stacksave,
  2014. // remove the restore. This can happen when variable allocas are DCE'd.
  2015. if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
  2016. if (SS->getIntrinsicID() == Intrinsic::stacksave &&
  2017. SS->getParent() == II->getParent()) {
  2018. BasicBlock::iterator BI(SS);
  2019. bool CannotRemove = false;
  2020. for (++BI; &*BI != II; ++BI) {
  2021. switch (Classify(&*BI)) {
  2022. case ClassifyResult::None:
  2023. // So far so good, look at next instructions.
  2024. break;
  2025. case ClassifyResult::StackRestore:
  2026. // If we found an intervening stackrestore for a different
  2027. // stacksave, we can't remove the stackrestore. Otherwise, continue.
  2028. if (cast<IntrinsicInst>(*BI).getArgOperand(0) != SS)
  2029. CannotRemove = true;
  2030. break;
  2031. case ClassifyResult::Alloca:
  2032. case ClassifyResult::CallWithSideEffects:
  2033. // If we found an alloca, a non-intrinsic call, or an intrinsic
  2034. // call with side effects, we can't remove the stackrestore.
  2035. CannotRemove = true;
  2036. break;
  2037. }
  2038. if (CannotRemove)
  2039. break;
  2040. }
  2041. if (!CannotRemove)
  2042. return eraseInstFromFunction(CI);
  2043. }
  2044. }
  2045. // Scan down this block to see if there is another stack restore in the
  2046. // same block without an intervening call/alloca.
  2047. BasicBlock::iterator BI(II);
  2048. Instruction *TI = II->getParent()->getTerminator();
  2049. bool CannotRemove = false;
  2050. for (++BI; &*BI != TI; ++BI) {
  2051. switch (Classify(&*BI)) {
  2052. case ClassifyResult::None:
  2053. // So far so good, look at next instructions.
  2054. break;
  2055. case ClassifyResult::StackRestore:
  2056. // If there is a stackrestore below this one, remove this one.
  2057. return eraseInstFromFunction(CI);
  2058. case ClassifyResult::Alloca:
  2059. case ClassifyResult::CallWithSideEffects:
  2060. // If we found an alloca, a non-intrinsic call, or an intrinsic call
  2061. // with side effects (such as llvm.stacksave and llvm.read_register),
  2062. // we can't remove the stack restore.
  2063. CannotRemove = true;
  2064. break;
  2065. }
  2066. if (CannotRemove)
  2067. break;
  2068. }
  2069. // If the stack restore is in a return, resume, or unwind block and if there
  2070. // are no allocas or calls between the restore and the return, nuke the
  2071. // restore.
  2072. if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI)))
  2073. return eraseInstFromFunction(CI);
  2074. break;
  2075. }
  2076. case Intrinsic::lifetime_end:
  2077. // Asan needs to poison memory to detect invalid access which is possible
  2078. // even for empty lifetime range.
  2079. if (II->getFunction()->hasFnAttribute(Attribute::SanitizeAddress) ||
  2080. II->getFunction()->hasFnAttribute(Attribute::SanitizeMemory) ||
  2081. II->getFunction()->hasFnAttribute(Attribute::SanitizeHWAddress))
  2082. break;
  2083. if (removeTriviallyEmptyRange(*II, *this, [](const IntrinsicInst &I) {
  2084. return I.getIntrinsicID() == Intrinsic::lifetime_start;
  2085. }))
  2086. return nullptr;
  2087. break;
  2088. case Intrinsic::assume: {
  2089. Value *IIOperand = II->getArgOperand(0);
  2090. SmallVector<OperandBundleDef, 4> OpBundles;
  2091. II->getOperandBundlesAsDefs(OpBundles);
  2092. /// This will remove the boolean Condition from the assume given as
  2093. /// argument and remove the assume if it becomes useless.
  2094. /// always returns nullptr for use as a return values.
  2095. auto RemoveConditionFromAssume = [&](Instruction *Assume) -> Instruction * {
  2096. assert(isa<AssumeInst>(Assume));
  2097. if (isAssumeWithEmptyBundle(*cast<AssumeInst>(II)))
  2098. return eraseInstFromFunction(CI);
  2099. replaceUse(II->getOperandUse(0), ConstantInt::getTrue(II->getContext()));
  2100. return nullptr;
  2101. };
  2102. // Remove an assume if it is followed by an identical assume.
  2103. // TODO: Do we need this? Unless there are conflicting assumptions, the
  2104. // computeKnownBits(IIOperand) below here eliminates redundant assumes.
  2105. Instruction *Next = II->getNextNonDebugInstruction();
  2106. if (match(Next, m_Intrinsic<Intrinsic::assume>(m_Specific(IIOperand))))
  2107. return RemoveConditionFromAssume(Next);
  2108. // Canonicalize assume(a && b) -> assume(a); assume(b);
  2109. // Note: New assumption intrinsics created here are registered by
  2110. // the InstCombineIRInserter object.
  2111. FunctionType *AssumeIntrinsicTy = II->getFunctionType();
  2112. Value *AssumeIntrinsic = II->getCalledOperand();
  2113. Value *A, *B;
  2114. if (match(IIOperand, m_LogicalAnd(m_Value(A), m_Value(B)))) {
  2115. Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, A, OpBundles,
  2116. II->getName());
  2117. Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, B, II->getName());
  2118. return eraseInstFromFunction(*II);
  2119. }
  2120. // assume(!(a || b)) -> assume(!a); assume(!b);
  2121. if (match(IIOperand, m_Not(m_LogicalOr(m_Value(A), m_Value(B))))) {
  2122. Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic,
  2123. Builder.CreateNot(A), OpBundles, II->getName());
  2124. Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic,
  2125. Builder.CreateNot(B), II->getName());
  2126. return eraseInstFromFunction(*II);
  2127. }
  2128. // assume( (load addr) != null ) -> add 'nonnull' metadata to load
  2129. // (if assume is valid at the load)
  2130. CmpInst::Predicate Pred;
  2131. Instruction *LHS;
  2132. if (match(IIOperand, m_ICmp(Pred, m_Instruction(LHS), m_Zero())) &&
  2133. Pred == ICmpInst::ICMP_NE && LHS->getOpcode() == Instruction::Load &&
  2134. LHS->getType()->isPointerTy() &&
  2135. isValidAssumeForContext(II, LHS, &DT)) {
  2136. MDNode *MD = MDNode::get(II->getContext(), std::nullopt);
  2137. LHS->setMetadata(LLVMContext::MD_nonnull, MD);
  2138. return RemoveConditionFromAssume(II);
  2139. // TODO: apply nonnull return attributes to calls and invokes
  2140. // TODO: apply range metadata for range check patterns?
  2141. }
  2142. // Convert nonnull assume like:
  2143. // %A = icmp ne i32* %PTR, null
  2144. // call void @llvm.assume(i1 %A)
  2145. // into
  2146. // call void @llvm.assume(i1 true) [ "nonnull"(i32* %PTR) ]
  2147. if (EnableKnowledgeRetention &&
  2148. match(IIOperand, m_Cmp(Pred, m_Value(A), m_Zero())) &&
  2149. Pred == CmpInst::ICMP_NE && A->getType()->isPointerTy()) {
  2150. if (auto *Replacement = buildAssumeFromKnowledge(
  2151. {RetainedKnowledge{Attribute::NonNull, 0, A}}, Next, &AC, &DT)) {
  2152. Replacement->insertBefore(Next);
  2153. AC.registerAssumption(Replacement);
  2154. return RemoveConditionFromAssume(II);
  2155. }
  2156. }
  2157. // Convert alignment assume like:
  2158. // %B = ptrtoint i32* %A to i64
  2159. // %C = and i64 %B, Constant
  2160. // %D = icmp eq i64 %C, 0
  2161. // call void @llvm.assume(i1 %D)
  2162. // into
  2163. // call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 Constant + 1)]
  2164. uint64_t AlignMask;
  2165. if (EnableKnowledgeRetention &&
  2166. match(IIOperand,
  2167. m_Cmp(Pred, m_And(m_Value(A), m_ConstantInt(AlignMask)),
  2168. m_Zero())) &&
  2169. Pred == CmpInst::ICMP_EQ) {
  2170. if (isPowerOf2_64(AlignMask + 1)) {
  2171. uint64_t Offset = 0;
  2172. match(A, m_Add(m_Value(A), m_ConstantInt(Offset)));
  2173. if (match(A, m_PtrToInt(m_Value(A)))) {
  2174. /// Note: this doesn't preserve the offset information but merges
  2175. /// offset and alignment.
  2176. /// TODO: we can generate a GEP instead of merging the alignment with
  2177. /// the offset.
  2178. RetainedKnowledge RK{Attribute::Alignment,
  2179. (unsigned)MinAlign(Offset, AlignMask + 1), A};
  2180. if (auto *Replacement =
  2181. buildAssumeFromKnowledge(RK, Next, &AC, &DT)) {
  2182. Replacement->insertAfter(II);
  2183. AC.registerAssumption(Replacement);
  2184. }
  2185. return RemoveConditionFromAssume(II);
  2186. }
  2187. }
  2188. }
  2189. /// Canonicalize Knowledge in operand bundles.
  2190. if (EnableKnowledgeRetention && II->hasOperandBundles()) {
  2191. for (unsigned Idx = 0; Idx < II->getNumOperandBundles(); Idx++) {
  2192. auto &BOI = II->bundle_op_info_begin()[Idx];
  2193. RetainedKnowledge RK =
  2194. llvm::getKnowledgeFromBundle(cast<AssumeInst>(*II), BOI);
  2195. if (BOI.End - BOI.Begin > 2)
  2196. continue; // Prevent reducing knowledge in an align with offset since
  2197. // extracting a RetainedKnowledge from them looses offset
  2198. // information
  2199. RetainedKnowledge CanonRK =
  2200. llvm::simplifyRetainedKnowledge(cast<AssumeInst>(II), RK,
  2201. &getAssumptionCache(),
  2202. &getDominatorTree());
  2203. if (CanonRK == RK)
  2204. continue;
  2205. if (!CanonRK) {
  2206. if (BOI.End - BOI.Begin > 0) {
  2207. Worklist.pushValue(II->op_begin()[BOI.Begin]);
  2208. Value::dropDroppableUse(II->op_begin()[BOI.Begin]);
  2209. }
  2210. continue;
  2211. }
  2212. assert(RK.AttrKind == CanonRK.AttrKind);
  2213. if (BOI.End - BOI.Begin > 0)
  2214. II->op_begin()[BOI.Begin].set(CanonRK.WasOn);
  2215. if (BOI.End - BOI.Begin > 1)
  2216. II->op_begin()[BOI.Begin + 1].set(ConstantInt::get(
  2217. Type::getInt64Ty(II->getContext()), CanonRK.ArgValue));
  2218. if (RK.WasOn)
  2219. Worklist.pushValue(RK.WasOn);
  2220. return II;
  2221. }
  2222. }
  2223. // If there is a dominating assume with the same condition as this one,
  2224. // then this one is redundant, and should be removed.
  2225. KnownBits Known(1);
  2226. computeKnownBits(IIOperand, Known, 0, II);
  2227. if (Known.isAllOnes() && isAssumeWithEmptyBundle(cast<AssumeInst>(*II)))
  2228. return eraseInstFromFunction(*II);
  2229. // Update the cache of affected values for this assumption (we might be
  2230. // here because we just simplified the condition).
  2231. AC.updateAffectedValues(cast<AssumeInst>(II));
  2232. break;
  2233. }
  2234. case Intrinsic::experimental_guard: {
  2235. // Is this guard followed by another guard? We scan forward over a small
  2236. // fixed window of instructions to handle common cases with conditions
  2237. // computed between guards.
  2238. Instruction *NextInst = II->getNextNonDebugInstruction();
  2239. for (unsigned i = 0; i < GuardWideningWindow; i++) {
  2240. // Note: Using context-free form to avoid compile time blow up
  2241. if (!isSafeToSpeculativelyExecute(NextInst))
  2242. break;
  2243. NextInst = NextInst->getNextNonDebugInstruction();
  2244. }
  2245. Value *NextCond = nullptr;
  2246. if (match(NextInst,
  2247. m_Intrinsic<Intrinsic::experimental_guard>(m_Value(NextCond)))) {
  2248. Value *CurrCond = II->getArgOperand(0);
  2249. // Remove a guard that it is immediately preceded by an identical guard.
  2250. // Otherwise canonicalize guard(a); guard(b) -> guard(a & b).
  2251. if (CurrCond != NextCond) {
  2252. Instruction *MoveI = II->getNextNonDebugInstruction();
  2253. while (MoveI != NextInst) {
  2254. auto *Temp = MoveI;
  2255. MoveI = MoveI->getNextNonDebugInstruction();
  2256. Temp->moveBefore(II);
  2257. }
  2258. replaceOperand(*II, 0, Builder.CreateAnd(CurrCond, NextCond));
  2259. }
  2260. eraseInstFromFunction(*NextInst);
  2261. return II;
  2262. }
  2263. break;
  2264. }
  2265. case Intrinsic::vector_insert: {
  2266. Value *Vec = II->getArgOperand(0);
  2267. Value *SubVec = II->getArgOperand(1);
  2268. Value *Idx = II->getArgOperand(2);
  2269. auto *DstTy = dyn_cast<FixedVectorType>(II->getType());
  2270. auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType());
  2271. auto *SubVecTy = dyn_cast<FixedVectorType>(SubVec->getType());
  2272. // Only canonicalize if the destination vector, Vec, and SubVec are all
  2273. // fixed vectors.
  2274. if (DstTy && VecTy && SubVecTy) {
  2275. unsigned DstNumElts = DstTy->getNumElements();
  2276. unsigned VecNumElts = VecTy->getNumElements();
  2277. unsigned SubVecNumElts = SubVecTy->getNumElements();
  2278. unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
  2279. // An insert that entirely overwrites Vec with SubVec is a nop.
  2280. if (VecNumElts == SubVecNumElts)
  2281. return replaceInstUsesWith(CI, SubVec);
  2282. // Widen SubVec into a vector of the same width as Vec, since
  2283. // shufflevector requires the two input vectors to be the same width.
  2284. // Elements beyond the bounds of SubVec within the widened vector are
  2285. // undefined.
  2286. SmallVector<int, 8> WidenMask;
  2287. unsigned i;
  2288. for (i = 0; i != SubVecNumElts; ++i)
  2289. WidenMask.push_back(i);
  2290. for (; i != VecNumElts; ++i)
  2291. WidenMask.push_back(UndefMaskElem);
  2292. Value *WidenShuffle = Builder.CreateShuffleVector(SubVec, WidenMask);
  2293. SmallVector<int, 8> Mask;
  2294. for (unsigned i = 0; i != IdxN; ++i)
  2295. Mask.push_back(i);
  2296. for (unsigned i = DstNumElts; i != DstNumElts + SubVecNumElts; ++i)
  2297. Mask.push_back(i);
  2298. for (unsigned i = IdxN + SubVecNumElts; i != DstNumElts; ++i)
  2299. Mask.push_back(i);
  2300. Value *Shuffle = Builder.CreateShuffleVector(Vec, WidenShuffle, Mask);
  2301. return replaceInstUsesWith(CI, Shuffle);
  2302. }
  2303. break;
  2304. }
  2305. case Intrinsic::vector_extract: {
  2306. Value *Vec = II->getArgOperand(0);
  2307. Value *Idx = II->getArgOperand(1);
  2308. Type *ReturnType = II->getType();
  2309. // (extract_vector (insert_vector InsertTuple, InsertValue, InsertIdx),
  2310. // ExtractIdx)
  2311. unsigned ExtractIdx = cast<ConstantInt>(Idx)->getZExtValue();
  2312. Value *InsertTuple, *InsertIdx, *InsertValue;
  2313. if (match(Vec, m_Intrinsic<Intrinsic::vector_insert>(m_Value(InsertTuple),
  2314. m_Value(InsertValue),
  2315. m_Value(InsertIdx))) &&
  2316. InsertValue->getType() == ReturnType) {
  2317. unsigned Index = cast<ConstantInt>(InsertIdx)->getZExtValue();
  2318. // Case where we get the same index right after setting it.
  2319. // extract.vector(insert.vector(InsertTuple, InsertValue, Idx), Idx) -->
  2320. // InsertValue
  2321. if (ExtractIdx == Index)
  2322. return replaceInstUsesWith(CI, InsertValue);
  2323. // If we are getting a different index than what was set in the
  2324. // insert.vector intrinsic. We can just set the input tuple to the one up
  2325. // in the chain. extract.vector(insert.vector(InsertTuple, InsertValue,
  2326. // InsertIndex), ExtractIndex)
  2327. // --> extract.vector(InsertTuple, ExtractIndex)
  2328. else
  2329. return replaceOperand(CI, 0, InsertTuple);
  2330. }
  2331. auto *DstTy = dyn_cast<FixedVectorType>(ReturnType);
  2332. auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType());
  2333. // Only canonicalize if the the destination vector and Vec are fixed
  2334. // vectors.
  2335. if (DstTy && VecTy) {
  2336. unsigned DstNumElts = DstTy->getNumElements();
  2337. unsigned VecNumElts = VecTy->getNumElements();
  2338. unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
  2339. // Extracting the entirety of Vec is a nop.
  2340. if (VecNumElts == DstNumElts) {
  2341. replaceInstUsesWith(CI, Vec);
  2342. return eraseInstFromFunction(CI);
  2343. }
  2344. SmallVector<int, 8> Mask;
  2345. for (unsigned i = 0; i != DstNumElts; ++i)
  2346. Mask.push_back(IdxN + i);
  2347. Value *Shuffle = Builder.CreateShuffleVector(Vec, Mask);
  2348. return replaceInstUsesWith(CI, Shuffle);
  2349. }
  2350. break;
  2351. }
  2352. case Intrinsic::experimental_vector_reverse: {
  2353. Value *BO0, *BO1, *X, *Y;
  2354. Value *Vec = II->getArgOperand(0);
  2355. if (match(Vec, m_OneUse(m_BinOp(m_Value(BO0), m_Value(BO1))))) {
  2356. auto *OldBinOp = cast<BinaryOperator>(Vec);
  2357. if (match(BO0, m_VecReverse(m_Value(X)))) {
  2358. // rev(binop rev(X), rev(Y)) --> binop X, Y
  2359. if (match(BO1, m_VecReverse(m_Value(Y))))
  2360. return replaceInstUsesWith(CI,
  2361. BinaryOperator::CreateWithCopiedFlags(
  2362. OldBinOp->getOpcode(), X, Y, OldBinOp,
  2363. OldBinOp->getName(), II));
  2364. // rev(binop rev(X), BO1Splat) --> binop X, BO1Splat
  2365. if (isSplatValue(BO1))
  2366. return replaceInstUsesWith(CI,
  2367. BinaryOperator::CreateWithCopiedFlags(
  2368. OldBinOp->getOpcode(), X, BO1,
  2369. OldBinOp, OldBinOp->getName(), II));
  2370. }
  2371. // rev(binop BO0Splat, rev(Y)) --> binop BO0Splat, Y
  2372. if (match(BO1, m_VecReverse(m_Value(Y))) && isSplatValue(BO0))
  2373. return replaceInstUsesWith(CI, BinaryOperator::CreateWithCopiedFlags(
  2374. OldBinOp->getOpcode(), BO0, Y,
  2375. OldBinOp, OldBinOp->getName(), II));
  2376. }
  2377. // rev(unop rev(X)) --> unop X
  2378. if (match(Vec, m_OneUse(m_UnOp(m_VecReverse(m_Value(X)))))) {
  2379. auto *OldUnOp = cast<UnaryOperator>(Vec);
  2380. auto *NewUnOp = UnaryOperator::CreateWithCopiedFlags(
  2381. OldUnOp->getOpcode(), X, OldUnOp, OldUnOp->getName(), II);
  2382. return replaceInstUsesWith(CI, NewUnOp);
  2383. }
  2384. break;
  2385. }
  2386. case Intrinsic::vector_reduce_or:
  2387. case Intrinsic::vector_reduce_and: {
  2388. // Canonicalize logical or/and reductions:
  2389. // Or reduction for i1 is represented as:
  2390. // %val = bitcast <ReduxWidth x i1> to iReduxWidth
  2391. // %res = cmp ne iReduxWidth %val, 0
  2392. // And reduction for i1 is represented as:
  2393. // %val = bitcast <ReduxWidth x i1> to iReduxWidth
  2394. // %res = cmp eq iReduxWidth %val, 11111
  2395. Value *Arg = II->getArgOperand(0);
  2396. Value *Vect;
  2397. if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
  2398. if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
  2399. if (FTy->getElementType() == Builder.getInt1Ty()) {
  2400. Value *Res = Builder.CreateBitCast(
  2401. Vect, Builder.getIntNTy(FTy->getNumElements()));
  2402. if (IID == Intrinsic::vector_reduce_and) {
  2403. Res = Builder.CreateICmpEQ(
  2404. Res, ConstantInt::getAllOnesValue(Res->getType()));
  2405. } else {
  2406. assert(IID == Intrinsic::vector_reduce_or &&
  2407. "Expected or reduction.");
  2408. Res = Builder.CreateIsNotNull(Res);
  2409. }
  2410. if (Arg != Vect)
  2411. Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
  2412. II->getType());
  2413. return replaceInstUsesWith(CI, Res);
  2414. }
  2415. }
  2416. [[fallthrough]];
  2417. }
  2418. case Intrinsic::vector_reduce_add: {
  2419. if (IID == Intrinsic::vector_reduce_add) {
  2420. // Convert vector_reduce_add(ZExt(<n x i1>)) to
  2421. // ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
  2422. // Convert vector_reduce_add(SExt(<n x i1>)) to
  2423. // -ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
  2424. // Convert vector_reduce_add(<n x i1>) to
  2425. // Trunc(ctpop(bitcast <n x i1> to in)).
  2426. Value *Arg = II->getArgOperand(0);
  2427. Value *Vect;
  2428. if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
  2429. if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
  2430. if (FTy->getElementType() == Builder.getInt1Ty()) {
  2431. Value *V = Builder.CreateBitCast(
  2432. Vect, Builder.getIntNTy(FTy->getNumElements()));
  2433. Value *Res = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, V);
  2434. if (Res->getType() != II->getType())
  2435. Res = Builder.CreateZExtOrTrunc(Res, II->getType());
  2436. if (Arg != Vect &&
  2437. cast<Instruction>(Arg)->getOpcode() == Instruction::SExt)
  2438. Res = Builder.CreateNeg(Res);
  2439. return replaceInstUsesWith(CI, Res);
  2440. }
  2441. }
  2442. }
  2443. [[fallthrough]];
  2444. }
  2445. case Intrinsic::vector_reduce_xor: {
  2446. if (IID == Intrinsic::vector_reduce_xor) {
  2447. // Exclusive disjunction reduction over the vector with
  2448. // (potentially-extended) i1 element type is actually a
  2449. // (potentially-extended) arithmetic `add` reduction over the original
  2450. // non-extended value:
  2451. // vector_reduce_xor(?ext(<n x i1>))
  2452. // -->
  2453. // ?ext(vector_reduce_add(<n x i1>))
  2454. Value *Arg = II->getArgOperand(0);
  2455. Value *Vect;
  2456. if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
  2457. if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
  2458. if (FTy->getElementType() == Builder.getInt1Ty()) {
  2459. Value *Res = Builder.CreateAddReduce(Vect);
  2460. if (Arg != Vect)
  2461. Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
  2462. II->getType());
  2463. return replaceInstUsesWith(CI, Res);
  2464. }
  2465. }
  2466. }
  2467. [[fallthrough]];
  2468. }
  2469. case Intrinsic::vector_reduce_mul: {
  2470. if (IID == Intrinsic::vector_reduce_mul) {
  2471. // Multiplicative reduction over the vector with (potentially-extended)
  2472. // i1 element type is actually a (potentially zero-extended)
  2473. // logical `and` reduction over the original non-extended value:
  2474. // vector_reduce_mul(?ext(<n x i1>))
  2475. // -->
  2476. // zext(vector_reduce_and(<n x i1>))
  2477. Value *Arg = II->getArgOperand(0);
  2478. Value *Vect;
  2479. if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
  2480. if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
  2481. if (FTy->getElementType() == Builder.getInt1Ty()) {
  2482. Value *Res = Builder.CreateAndReduce(Vect);
  2483. if (Res->getType() != II->getType())
  2484. Res = Builder.CreateZExt(Res, II->getType());
  2485. return replaceInstUsesWith(CI, Res);
  2486. }
  2487. }
  2488. }
  2489. [[fallthrough]];
  2490. }
  2491. case Intrinsic::vector_reduce_umin:
  2492. case Intrinsic::vector_reduce_umax: {
  2493. if (IID == Intrinsic::vector_reduce_umin ||
  2494. IID == Intrinsic::vector_reduce_umax) {
  2495. // UMin/UMax reduction over the vector with (potentially-extended)
  2496. // i1 element type is actually a (potentially-extended)
  2497. // logical `and`/`or` reduction over the original non-extended value:
  2498. // vector_reduce_u{min,max}(?ext(<n x i1>))
  2499. // -->
  2500. // ?ext(vector_reduce_{and,or}(<n x i1>))
  2501. Value *Arg = II->getArgOperand(0);
  2502. Value *Vect;
  2503. if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
  2504. if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
  2505. if (FTy->getElementType() == Builder.getInt1Ty()) {
  2506. Value *Res = IID == Intrinsic::vector_reduce_umin
  2507. ? Builder.CreateAndReduce(Vect)
  2508. : Builder.CreateOrReduce(Vect);
  2509. if (Arg != Vect)
  2510. Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
  2511. II->getType());
  2512. return replaceInstUsesWith(CI, Res);
  2513. }
  2514. }
  2515. }
  2516. [[fallthrough]];
  2517. }
  2518. case Intrinsic::vector_reduce_smin:
  2519. case Intrinsic::vector_reduce_smax: {
  2520. if (IID == Intrinsic::vector_reduce_smin ||
  2521. IID == Intrinsic::vector_reduce_smax) {
  2522. // SMin/SMax reduction over the vector with (potentially-extended)
  2523. // i1 element type is actually a (potentially-extended)
  2524. // logical `and`/`or` reduction over the original non-extended value:
  2525. // vector_reduce_s{min,max}(<n x i1>)
  2526. // -->
  2527. // vector_reduce_{or,and}(<n x i1>)
  2528. // and
  2529. // vector_reduce_s{min,max}(sext(<n x i1>))
  2530. // -->
  2531. // sext(vector_reduce_{or,and}(<n x i1>))
  2532. // and
  2533. // vector_reduce_s{min,max}(zext(<n x i1>))
  2534. // -->
  2535. // zext(vector_reduce_{and,or}(<n x i1>))
  2536. Value *Arg = II->getArgOperand(0);
  2537. Value *Vect;
  2538. if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
  2539. if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
  2540. if (FTy->getElementType() == Builder.getInt1Ty()) {
  2541. Instruction::CastOps ExtOpc = Instruction::CastOps::CastOpsEnd;
  2542. if (Arg != Vect)
  2543. ExtOpc = cast<CastInst>(Arg)->getOpcode();
  2544. Value *Res = ((IID == Intrinsic::vector_reduce_smin) ==
  2545. (ExtOpc == Instruction::CastOps::ZExt))
  2546. ? Builder.CreateAndReduce(Vect)
  2547. : Builder.CreateOrReduce(Vect);
  2548. if (Arg != Vect)
  2549. Res = Builder.CreateCast(ExtOpc, Res, II->getType());
  2550. return replaceInstUsesWith(CI, Res);
  2551. }
  2552. }
  2553. }
  2554. [[fallthrough]];
  2555. }
  2556. case Intrinsic::vector_reduce_fmax:
  2557. case Intrinsic::vector_reduce_fmin:
  2558. case Intrinsic::vector_reduce_fadd:
  2559. case Intrinsic::vector_reduce_fmul: {
  2560. bool CanBeReassociated = (IID != Intrinsic::vector_reduce_fadd &&
  2561. IID != Intrinsic::vector_reduce_fmul) ||
  2562. II->hasAllowReassoc();
  2563. const unsigned ArgIdx = (IID == Intrinsic::vector_reduce_fadd ||
  2564. IID == Intrinsic::vector_reduce_fmul)
  2565. ? 1
  2566. : 0;
  2567. Value *Arg = II->getArgOperand(ArgIdx);
  2568. Value *V;
  2569. ArrayRef<int> Mask;
  2570. if (!isa<FixedVectorType>(Arg->getType()) || !CanBeReassociated ||
  2571. !match(Arg, m_Shuffle(m_Value(V), m_Undef(), m_Mask(Mask))) ||
  2572. !cast<ShuffleVectorInst>(Arg)->isSingleSource())
  2573. break;
  2574. int Sz = Mask.size();
  2575. SmallBitVector UsedIndices(Sz);
  2576. for (int Idx : Mask) {
  2577. if (Idx == UndefMaskElem || UsedIndices.test(Idx))
  2578. break;
  2579. UsedIndices.set(Idx);
  2580. }
  2581. // Can remove shuffle iff just shuffled elements, no repeats, undefs, or
  2582. // other changes.
  2583. if (UsedIndices.all()) {
  2584. replaceUse(II->getOperandUse(ArgIdx), V);
  2585. return nullptr;
  2586. }
  2587. break;
  2588. }
  2589. default: {
  2590. // Handle target specific intrinsics
  2591. std::optional<Instruction *> V = targetInstCombineIntrinsic(*II);
  2592. if (V)
  2593. return *V;
  2594. break;
  2595. }
  2596. }
  2597. if (Instruction *Shuf = foldShuffledIntrinsicOperands(II, Builder))
  2598. return Shuf;
  2599. // Some intrinsics (like experimental_gc_statepoint) can be used in invoke
  2600. // context, so it is handled in visitCallBase and we should trigger it.
  2601. return visitCallBase(*II);
  2602. }
  2603. // Fence instruction simplification
  2604. Instruction *InstCombinerImpl::visitFenceInst(FenceInst &FI) {
  2605. auto *NFI = dyn_cast<FenceInst>(FI.getNextNonDebugInstruction());
  2606. // This check is solely here to handle arbitrary target-dependent syncscopes.
  2607. // TODO: Can remove if does not matter in practice.
  2608. if (NFI && FI.isIdenticalTo(NFI))
  2609. return eraseInstFromFunction(FI);
  2610. // Returns true if FI1 is identical or stronger fence than FI2.
  2611. auto isIdenticalOrStrongerFence = [](FenceInst *FI1, FenceInst *FI2) {
  2612. auto FI1SyncScope = FI1->getSyncScopeID();
  2613. // Consider same scope, where scope is global or single-thread.
  2614. if (FI1SyncScope != FI2->getSyncScopeID() ||
  2615. (FI1SyncScope != SyncScope::System &&
  2616. FI1SyncScope != SyncScope::SingleThread))
  2617. return false;
  2618. return isAtLeastOrStrongerThan(FI1->getOrdering(), FI2->getOrdering());
  2619. };
  2620. if (NFI && isIdenticalOrStrongerFence(NFI, &FI))
  2621. return eraseInstFromFunction(FI);
  2622. if (auto *PFI = dyn_cast_or_null<FenceInst>(FI.getPrevNonDebugInstruction()))
  2623. if (isIdenticalOrStrongerFence(PFI, &FI))
  2624. return eraseInstFromFunction(FI);
  2625. return nullptr;
  2626. }
  2627. // InvokeInst simplification
  2628. Instruction *InstCombinerImpl::visitInvokeInst(InvokeInst &II) {
  2629. return visitCallBase(II);
  2630. }
  2631. // CallBrInst simplification
  2632. Instruction *InstCombinerImpl::visitCallBrInst(CallBrInst &CBI) {
  2633. return visitCallBase(CBI);
  2634. }
  2635. /// If this cast does not affect the value passed through the varargs area, we
  2636. /// can eliminate the use of the cast.
  2637. static bool isSafeToEliminateVarargsCast(const CallBase &Call,
  2638. const DataLayout &DL,
  2639. const CastInst *const CI,
  2640. const int ix) {
  2641. if (!CI->isLosslessCast())
  2642. return false;
  2643. // If this is a GC intrinsic, avoid munging types. We need types for
  2644. // statepoint reconstruction in SelectionDAG.
  2645. // TODO: This is probably something which should be expanded to all
  2646. // intrinsics since the entire point of intrinsics is that
  2647. // they are understandable by the optimizer.
  2648. if (isa<GCStatepointInst>(Call) || isa<GCRelocateInst>(Call) ||
  2649. isa<GCResultInst>(Call))
  2650. return false;
  2651. // Opaque pointers are compatible with any byval types.
  2652. PointerType *SrcTy = cast<PointerType>(CI->getOperand(0)->getType());
  2653. if (SrcTy->isOpaque())
  2654. return true;
  2655. // The size of ByVal or InAlloca arguments is derived from the type, so we
  2656. // can't change to a type with a different size. If the size were
  2657. // passed explicitly we could avoid this check.
  2658. if (!Call.isPassPointeeByValueArgument(ix))
  2659. return true;
  2660. // The transform currently only handles type replacement for byval, not other
  2661. // type-carrying attributes.
  2662. if (!Call.isByValArgument(ix))
  2663. return false;
  2664. Type *SrcElemTy = SrcTy->getNonOpaquePointerElementType();
  2665. Type *DstElemTy = Call.getParamByValType(ix);
  2666. if (!SrcElemTy->isSized() || !DstElemTy->isSized())
  2667. return false;
  2668. if (DL.getTypeAllocSize(SrcElemTy) != DL.getTypeAllocSize(DstElemTy))
  2669. return false;
  2670. return true;
  2671. }
  2672. Instruction *InstCombinerImpl::tryOptimizeCall(CallInst *CI) {
  2673. if (!CI->getCalledFunction()) return nullptr;
  2674. // Skip optimizing notail and musttail calls so
  2675. // LibCallSimplifier::optimizeCall doesn't have to preserve those invariants.
  2676. // LibCallSimplifier::optimizeCall should try to preseve tail calls though.
  2677. if (CI->isMustTailCall() || CI->isNoTailCall())
  2678. return nullptr;
  2679. auto InstCombineRAUW = [this](Instruction *From, Value *With) {
  2680. replaceInstUsesWith(*From, With);
  2681. };
  2682. auto InstCombineErase = [this](Instruction *I) {
  2683. eraseInstFromFunction(*I);
  2684. };
  2685. LibCallSimplifier Simplifier(DL, &TLI, ORE, BFI, PSI, InstCombineRAUW,
  2686. InstCombineErase);
  2687. if (Value *With = Simplifier.optimizeCall(CI, Builder)) {
  2688. ++NumSimplified;
  2689. return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With);
  2690. }
  2691. return nullptr;
  2692. }
  2693. static IntrinsicInst *findInitTrampolineFromAlloca(Value *TrampMem) {
  2694. // Strip off at most one level of pointer casts, looking for an alloca. This
  2695. // is good enough in practice and simpler than handling any number of casts.
  2696. Value *Underlying = TrampMem->stripPointerCasts();
  2697. if (Underlying != TrampMem &&
  2698. (!Underlying->hasOneUse() || Underlying->user_back() != TrampMem))
  2699. return nullptr;
  2700. if (!isa<AllocaInst>(Underlying))
  2701. return nullptr;
  2702. IntrinsicInst *InitTrampoline = nullptr;
  2703. for (User *U : TrampMem->users()) {
  2704. IntrinsicInst *II = dyn_cast<IntrinsicInst>(U);
  2705. if (!II)
  2706. return nullptr;
  2707. if (II->getIntrinsicID() == Intrinsic::init_trampoline) {
  2708. if (InitTrampoline)
  2709. // More than one init_trampoline writes to this value. Give up.
  2710. return nullptr;
  2711. InitTrampoline = II;
  2712. continue;
  2713. }
  2714. if (II->getIntrinsicID() == Intrinsic::adjust_trampoline)
  2715. // Allow any number of calls to adjust.trampoline.
  2716. continue;
  2717. return nullptr;
  2718. }
  2719. // No call to init.trampoline found.
  2720. if (!InitTrampoline)
  2721. return nullptr;
  2722. // Check that the alloca is being used in the expected way.
  2723. if (InitTrampoline->getOperand(0) != TrampMem)
  2724. return nullptr;
  2725. return InitTrampoline;
  2726. }
  2727. static IntrinsicInst *findInitTrampolineFromBB(IntrinsicInst *AdjustTramp,
  2728. Value *TrampMem) {
  2729. // Visit all the previous instructions in the basic block, and try to find a
  2730. // init.trampoline which has a direct path to the adjust.trampoline.
  2731. for (BasicBlock::iterator I = AdjustTramp->getIterator(),
  2732. E = AdjustTramp->getParent()->begin();
  2733. I != E;) {
  2734. Instruction *Inst = &*--I;
  2735. if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
  2736. if (II->getIntrinsicID() == Intrinsic::init_trampoline &&
  2737. II->getOperand(0) == TrampMem)
  2738. return II;
  2739. if (Inst->mayWriteToMemory())
  2740. return nullptr;
  2741. }
  2742. return nullptr;
  2743. }
  2744. // Given a call to llvm.adjust.trampoline, find and return the corresponding
  2745. // call to llvm.init.trampoline if the call to the trampoline can be optimized
  2746. // to a direct call to a function. Otherwise return NULL.
  2747. static IntrinsicInst *findInitTrampoline(Value *Callee) {
  2748. Callee = Callee->stripPointerCasts();
  2749. IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee);
  2750. if (!AdjustTramp ||
  2751. AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline)
  2752. return nullptr;
  2753. Value *TrampMem = AdjustTramp->getOperand(0);
  2754. if (IntrinsicInst *IT = findInitTrampolineFromAlloca(TrampMem))
  2755. return IT;
  2756. if (IntrinsicInst *IT = findInitTrampolineFromBB(AdjustTramp, TrampMem))
  2757. return IT;
  2758. return nullptr;
  2759. }
  2760. bool InstCombinerImpl::annotateAnyAllocSite(CallBase &Call,
  2761. const TargetLibraryInfo *TLI) {
  2762. // Note: We only handle cases which can't be driven from generic attributes
  2763. // here. So, for example, nonnull and noalias (which are common properties
  2764. // of some allocation functions) are expected to be handled via annotation
  2765. // of the respective allocator declaration with generic attributes.
  2766. bool Changed = false;
  2767. if (!Call.getType()->isPointerTy())
  2768. return Changed;
  2769. std::optional<APInt> Size = getAllocSize(&Call, TLI);
  2770. if (Size && *Size != 0) {
  2771. // TODO: We really should just emit deref_or_null here and then
  2772. // let the generic inference code combine that with nonnull.
  2773. if (Call.hasRetAttr(Attribute::NonNull)) {
  2774. Changed = !Call.hasRetAttr(Attribute::Dereferenceable);
  2775. Call.addRetAttr(Attribute::getWithDereferenceableBytes(
  2776. Call.getContext(), Size->getLimitedValue()));
  2777. } else {
  2778. Changed = !Call.hasRetAttr(Attribute::DereferenceableOrNull);
  2779. Call.addRetAttr(Attribute::getWithDereferenceableOrNullBytes(
  2780. Call.getContext(), Size->getLimitedValue()));
  2781. }
  2782. }
  2783. // Add alignment attribute if alignment is a power of two constant.
  2784. Value *Alignment = getAllocAlignment(&Call, TLI);
  2785. if (!Alignment)
  2786. return Changed;
  2787. ConstantInt *AlignOpC = dyn_cast<ConstantInt>(Alignment);
  2788. if (AlignOpC && AlignOpC->getValue().ult(llvm::Value::MaximumAlignment)) {
  2789. uint64_t AlignmentVal = AlignOpC->getZExtValue();
  2790. if (llvm::isPowerOf2_64(AlignmentVal)) {
  2791. Align ExistingAlign = Call.getRetAlign().valueOrOne();
  2792. Align NewAlign = Align(AlignmentVal);
  2793. if (NewAlign > ExistingAlign) {
  2794. Call.addRetAttr(
  2795. Attribute::getWithAlignment(Call.getContext(), NewAlign));
  2796. Changed = true;
  2797. }
  2798. }
  2799. }
  2800. return Changed;
  2801. }
  2802. /// Improvements for call, callbr and invoke instructions.
  2803. Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
  2804. bool Changed = annotateAnyAllocSite(Call, &TLI);
  2805. // Mark any parameters that are known to be non-null with the nonnull
  2806. // attribute. This is helpful for inlining calls to functions with null
  2807. // checks on their arguments.
  2808. SmallVector<unsigned, 4> ArgNos;
  2809. unsigned ArgNo = 0;
  2810. for (Value *V : Call.args()) {
  2811. if (V->getType()->isPointerTy() &&
  2812. !Call.paramHasAttr(ArgNo, Attribute::NonNull) &&
  2813. isKnownNonZero(V, DL, 0, &AC, &Call, &DT))
  2814. ArgNos.push_back(ArgNo);
  2815. ArgNo++;
  2816. }
  2817. assert(ArgNo == Call.arg_size() && "Call arguments not processed correctly.");
  2818. if (!ArgNos.empty()) {
  2819. AttributeList AS = Call.getAttributes();
  2820. LLVMContext &Ctx = Call.getContext();
  2821. AS = AS.addParamAttribute(Ctx, ArgNos,
  2822. Attribute::get(Ctx, Attribute::NonNull));
  2823. Call.setAttributes(AS);
  2824. Changed = true;
  2825. }
  2826. // If the callee is a pointer to a function, attempt to move any casts to the
  2827. // arguments of the call/callbr/invoke.
  2828. Value *Callee = Call.getCalledOperand();
  2829. Function *CalleeF = dyn_cast<Function>(Callee);
  2830. if ((!CalleeF || CalleeF->getFunctionType() != Call.getFunctionType()) &&
  2831. transformConstExprCastCall(Call))
  2832. return nullptr;
  2833. if (CalleeF) {
  2834. // Remove the convergent attr on calls when the callee is not convergent.
  2835. if (Call.isConvergent() && !CalleeF->isConvergent() &&
  2836. !CalleeF->isIntrinsic()) {
  2837. LLVM_DEBUG(dbgs() << "Removing convergent attr from instr " << Call
  2838. << "\n");
  2839. Call.setNotConvergent();
  2840. return &Call;
  2841. }
  2842. // If the call and callee calling conventions don't match, and neither one
  2843. // of the calling conventions is compatible with C calling convention
  2844. // this call must be unreachable, as the call is undefined.
  2845. if ((CalleeF->getCallingConv() != Call.getCallingConv() &&
  2846. !(CalleeF->getCallingConv() == llvm::CallingConv::C &&
  2847. TargetLibraryInfoImpl::isCallingConvCCompatible(&Call)) &&
  2848. !(Call.getCallingConv() == llvm::CallingConv::C &&
  2849. TargetLibraryInfoImpl::isCallingConvCCompatible(CalleeF))) &&
  2850. // Only do this for calls to a function with a body. A prototype may
  2851. // not actually end up matching the implementation's calling conv for a
  2852. // variety of reasons (e.g. it may be written in assembly).
  2853. !CalleeF->isDeclaration()) {
  2854. Instruction *OldCall = &Call;
  2855. CreateNonTerminatorUnreachable(OldCall);
  2856. // If OldCall does not return void then replaceInstUsesWith poison.
  2857. // This allows ValueHandlers and custom metadata to adjust itself.
  2858. if (!OldCall->getType()->isVoidTy())
  2859. replaceInstUsesWith(*OldCall, PoisonValue::get(OldCall->getType()));
  2860. if (isa<CallInst>(OldCall))
  2861. return eraseInstFromFunction(*OldCall);
  2862. // We cannot remove an invoke or a callbr, because it would change thexi
  2863. // CFG, just change the callee to a null pointer.
  2864. cast<CallBase>(OldCall)->setCalledFunction(
  2865. CalleeF->getFunctionType(),
  2866. Constant::getNullValue(CalleeF->getType()));
  2867. return nullptr;
  2868. }
  2869. }
  2870. // Calling a null function pointer is undefined if a null address isn't
  2871. // dereferenceable.
  2872. if ((isa<ConstantPointerNull>(Callee) &&
  2873. !NullPointerIsDefined(Call.getFunction())) ||
  2874. isa<UndefValue>(Callee)) {
  2875. // If Call does not return void then replaceInstUsesWith poison.
  2876. // This allows ValueHandlers and custom metadata to adjust itself.
  2877. if (!Call.getType()->isVoidTy())
  2878. replaceInstUsesWith(Call, PoisonValue::get(Call.getType()));
  2879. if (Call.isTerminator()) {
  2880. // Can't remove an invoke or callbr because we cannot change the CFG.
  2881. return nullptr;
  2882. }
  2883. // This instruction is not reachable, just remove it.
  2884. CreateNonTerminatorUnreachable(&Call);
  2885. return eraseInstFromFunction(Call);
  2886. }
  2887. if (IntrinsicInst *II = findInitTrampoline(Callee))
  2888. return transformCallThroughTrampoline(Call, *II);
  2889. // TODO: Drop this transform once opaque pointer transition is done.
  2890. FunctionType *FTy = Call.getFunctionType();
  2891. if (FTy->isVarArg()) {
  2892. int ix = FTy->getNumParams();
  2893. // See if we can optimize any arguments passed through the varargs area of
  2894. // the call.
  2895. for (auto I = Call.arg_begin() + FTy->getNumParams(), E = Call.arg_end();
  2896. I != E; ++I, ++ix) {
  2897. CastInst *CI = dyn_cast<CastInst>(*I);
  2898. if (CI && isSafeToEliminateVarargsCast(Call, DL, CI, ix)) {
  2899. replaceUse(*I, CI->getOperand(0));
  2900. // Update the byval type to match the pointer type.
  2901. // Not necessary for opaque pointers.
  2902. PointerType *NewTy = cast<PointerType>(CI->getOperand(0)->getType());
  2903. if (!NewTy->isOpaque() && Call.isByValArgument(ix)) {
  2904. Call.removeParamAttr(ix, Attribute::ByVal);
  2905. Call.addParamAttr(ix, Attribute::getWithByValType(
  2906. Call.getContext(),
  2907. NewTy->getNonOpaquePointerElementType()));
  2908. }
  2909. Changed = true;
  2910. }
  2911. }
  2912. }
  2913. if (isa<InlineAsm>(Callee) && !Call.doesNotThrow()) {
  2914. InlineAsm *IA = cast<InlineAsm>(Callee);
  2915. if (!IA->canThrow()) {
  2916. // Normal inline asm calls cannot throw - mark them
  2917. // 'nounwind'.
  2918. Call.setDoesNotThrow();
  2919. Changed = true;
  2920. }
  2921. }
  2922. // Try to optimize the call if possible, we require DataLayout for most of
  2923. // this. None of these calls are seen as possibly dead so go ahead and
  2924. // delete the instruction now.
  2925. if (CallInst *CI = dyn_cast<CallInst>(&Call)) {
  2926. Instruction *I = tryOptimizeCall(CI);
  2927. // If we changed something return the result, etc. Otherwise let
  2928. // the fallthrough check.
  2929. if (I) return eraseInstFromFunction(*I);
  2930. }
  2931. if (!Call.use_empty() && !Call.isMustTailCall())
  2932. if (Value *ReturnedArg = Call.getReturnedArgOperand()) {
  2933. Type *CallTy = Call.getType();
  2934. Type *RetArgTy = ReturnedArg->getType();
  2935. if (RetArgTy->canLosslesslyBitCastTo(CallTy))
  2936. return replaceInstUsesWith(
  2937. Call, Builder.CreateBitOrPointerCast(ReturnedArg, CallTy));
  2938. }
  2939. // Drop unnecessary kcfi operand bundles from calls that were converted
  2940. // into direct calls.
  2941. auto Bundle = Call.getOperandBundle(LLVMContext::OB_kcfi);
  2942. if (Bundle && !Call.isIndirectCall()) {
  2943. DEBUG_WITH_TYPE(DEBUG_TYPE "-kcfi", {
  2944. if (CalleeF) {
  2945. ConstantInt *FunctionType = nullptr;
  2946. ConstantInt *ExpectedType = cast<ConstantInt>(Bundle->Inputs[0]);
  2947. if (MDNode *MD = CalleeF->getMetadata(LLVMContext::MD_kcfi_type))
  2948. FunctionType = mdconst::extract<ConstantInt>(MD->getOperand(0));
  2949. if (FunctionType &&
  2950. FunctionType->getZExtValue() != ExpectedType->getZExtValue())
  2951. dbgs() << Call.getModule()->getName()
  2952. << ": warning: kcfi: " << Call.getCaller()->getName()
  2953. << ": call to " << CalleeF->getName()
  2954. << " using a mismatching function pointer type\n";
  2955. }
  2956. });
  2957. return CallBase::removeOperandBundle(&Call, LLVMContext::OB_kcfi);
  2958. }
  2959. if (isRemovableAlloc(&Call, &TLI))
  2960. return visitAllocSite(Call);
  2961. // Handle intrinsics which can be used in both call and invoke context.
  2962. switch (Call.getIntrinsicID()) {
  2963. case Intrinsic::experimental_gc_statepoint: {
  2964. GCStatepointInst &GCSP = *cast<GCStatepointInst>(&Call);
  2965. SmallPtrSet<Value *, 32> LiveGcValues;
  2966. for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
  2967. GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc);
  2968. // Remove the relocation if unused.
  2969. if (GCR.use_empty()) {
  2970. eraseInstFromFunction(GCR);
  2971. continue;
  2972. }
  2973. Value *DerivedPtr = GCR.getDerivedPtr();
  2974. Value *BasePtr = GCR.getBasePtr();
  2975. // Undef is undef, even after relocation.
  2976. if (isa<UndefValue>(DerivedPtr) || isa<UndefValue>(BasePtr)) {
  2977. replaceInstUsesWith(GCR, UndefValue::get(GCR.getType()));
  2978. eraseInstFromFunction(GCR);
  2979. continue;
  2980. }
  2981. if (auto *PT = dyn_cast<PointerType>(GCR.getType())) {
  2982. // The relocation of null will be null for most any collector.
  2983. // TODO: provide a hook for this in GCStrategy. There might be some
  2984. // weird collector this property does not hold for.
  2985. if (isa<ConstantPointerNull>(DerivedPtr)) {
  2986. // Use null-pointer of gc_relocate's type to replace it.
  2987. replaceInstUsesWith(GCR, ConstantPointerNull::get(PT));
  2988. eraseInstFromFunction(GCR);
  2989. continue;
  2990. }
  2991. // isKnownNonNull -> nonnull attribute
  2992. if (!GCR.hasRetAttr(Attribute::NonNull) &&
  2993. isKnownNonZero(DerivedPtr, DL, 0, &AC, &Call, &DT)) {
  2994. GCR.addRetAttr(Attribute::NonNull);
  2995. // We discovered new fact, re-check users.
  2996. Worklist.pushUsersToWorkList(GCR);
  2997. }
  2998. }
  2999. // If we have two copies of the same pointer in the statepoint argument
  3000. // list, canonicalize to one. This may let us common gc.relocates.
  3001. if (GCR.getBasePtr() == GCR.getDerivedPtr() &&
  3002. GCR.getBasePtrIndex() != GCR.getDerivedPtrIndex()) {
  3003. auto *OpIntTy = GCR.getOperand(2)->getType();
  3004. GCR.setOperand(2, ConstantInt::get(OpIntTy, GCR.getBasePtrIndex()));
  3005. }
  3006. // TODO: bitcast(relocate(p)) -> relocate(bitcast(p))
  3007. // Canonicalize on the type from the uses to the defs
  3008. // TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...)
  3009. LiveGcValues.insert(BasePtr);
  3010. LiveGcValues.insert(DerivedPtr);
  3011. }
  3012. std::optional<OperandBundleUse> Bundle =
  3013. GCSP.getOperandBundle(LLVMContext::OB_gc_live);
  3014. unsigned NumOfGCLives = LiveGcValues.size();
  3015. if (!Bundle || NumOfGCLives == Bundle->Inputs.size())
  3016. break;
  3017. // We can reduce the size of gc live bundle.
  3018. DenseMap<Value *, unsigned> Val2Idx;
  3019. std::vector<Value *> NewLiveGc;
  3020. for (Value *V : Bundle->Inputs) {
  3021. if (Val2Idx.count(V))
  3022. continue;
  3023. if (LiveGcValues.count(V)) {
  3024. Val2Idx[V] = NewLiveGc.size();
  3025. NewLiveGc.push_back(V);
  3026. } else
  3027. Val2Idx[V] = NumOfGCLives;
  3028. }
  3029. // Update all gc.relocates
  3030. for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
  3031. GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc);
  3032. Value *BasePtr = GCR.getBasePtr();
  3033. assert(Val2Idx.count(BasePtr) && Val2Idx[BasePtr] != NumOfGCLives &&
  3034. "Missed live gc for base pointer");
  3035. auto *OpIntTy1 = GCR.getOperand(1)->getType();
  3036. GCR.setOperand(1, ConstantInt::get(OpIntTy1, Val2Idx[BasePtr]));
  3037. Value *DerivedPtr = GCR.getDerivedPtr();
  3038. assert(Val2Idx.count(DerivedPtr) && Val2Idx[DerivedPtr] != NumOfGCLives &&
  3039. "Missed live gc for derived pointer");
  3040. auto *OpIntTy2 = GCR.getOperand(2)->getType();
  3041. GCR.setOperand(2, ConstantInt::get(OpIntTy2, Val2Idx[DerivedPtr]));
  3042. }
  3043. // Create new statepoint instruction.
  3044. OperandBundleDef NewBundle("gc-live", NewLiveGc);
  3045. return CallBase::Create(&Call, NewBundle);
  3046. }
  3047. default: { break; }
  3048. }
  3049. return Changed ? &Call : nullptr;
  3050. }
  3051. /// If the callee is a constexpr cast of a function, attempt to move the cast to
  3052. /// the arguments of the call/callbr/invoke.
  3053. bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
  3054. auto *Callee =
  3055. dyn_cast<Function>(Call.getCalledOperand()->stripPointerCasts());
  3056. if (!Callee)
  3057. return false;
  3058. // If this is a call to a thunk function, don't remove the cast. Thunks are
  3059. // used to transparently forward all incoming parameters and outgoing return
  3060. // values, so it's important to leave the cast in place.
  3061. if (Callee->hasFnAttribute("thunk"))
  3062. return false;
  3063. // If this is a musttail call, the callee's prototype must match the caller's
  3064. // prototype with the exception of pointee types. The code below doesn't
  3065. // implement that, so we can't do this transform.
  3066. // TODO: Do the transform if it only requires adding pointer casts.
  3067. if (Call.isMustTailCall())
  3068. return false;
  3069. Instruction *Caller = &Call;
  3070. const AttributeList &CallerPAL = Call.getAttributes();
  3071. // Okay, this is a cast from a function to a different type. Unless doing so
  3072. // would cause a type conversion of one of our arguments, change this call to
  3073. // be a direct call with arguments casted to the appropriate types.
  3074. FunctionType *FT = Callee->getFunctionType();
  3075. Type *OldRetTy = Caller->getType();
  3076. Type *NewRetTy = FT->getReturnType();
  3077. // Check to see if we are changing the return type...
  3078. if (OldRetTy != NewRetTy) {
  3079. if (NewRetTy->isStructTy())
  3080. return false; // TODO: Handle multiple return values.
  3081. if (!CastInst::isBitOrNoopPointerCastable(NewRetTy, OldRetTy, DL)) {
  3082. if (Callee->isDeclaration())
  3083. return false; // Cannot transform this return value.
  3084. if (!Caller->use_empty() &&
  3085. // void -> non-void is handled specially
  3086. !NewRetTy->isVoidTy())
  3087. return false; // Cannot transform this return value.
  3088. }
  3089. if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
  3090. AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs());
  3091. if (RAttrs.overlaps(AttributeFuncs::typeIncompatible(NewRetTy)))
  3092. return false; // Attribute not compatible with transformed value.
  3093. }
  3094. // If the callbase is an invoke/callbr instruction, and the return value is
  3095. // used by a PHI node in a successor, we cannot change the return type of
  3096. // the call because there is no place to put the cast instruction (without
  3097. // breaking the critical edge). Bail out in this case.
  3098. if (!Caller->use_empty()) {
  3099. BasicBlock *PhisNotSupportedBlock = nullptr;
  3100. if (auto *II = dyn_cast<InvokeInst>(Caller))
  3101. PhisNotSupportedBlock = II->getNormalDest();
  3102. if (auto *CB = dyn_cast<CallBrInst>(Caller))
  3103. PhisNotSupportedBlock = CB->getDefaultDest();
  3104. if (PhisNotSupportedBlock)
  3105. for (User *U : Caller->users())
  3106. if (PHINode *PN = dyn_cast<PHINode>(U))
  3107. if (PN->getParent() == PhisNotSupportedBlock)
  3108. return false;
  3109. }
  3110. }
  3111. unsigned NumActualArgs = Call.arg_size();
  3112. unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs);
  3113. // Prevent us turning:
  3114. // declare void @takes_i32_inalloca(i32* inalloca)
  3115. // call void bitcast (void (i32*)* @takes_i32_inalloca to void (i32)*)(i32 0)
  3116. //
  3117. // into:
  3118. // call void @takes_i32_inalloca(i32* null)
  3119. //
  3120. // Similarly, avoid folding away bitcasts of byval calls.
  3121. if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
  3122. Callee->getAttributes().hasAttrSomewhere(Attribute::Preallocated))
  3123. return false;
  3124. auto AI = Call.arg_begin();
  3125. for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
  3126. Type *ParamTy = FT->getParamType(i);
  3127. Type *ActTy = (*AI)->getType();
  3128. if (!CastInst::isBitOrNoopPointerCastable(ActTy, ParamTy, DL))
  3129. return false; // Cannot transform this parameter value.
  3130. // Check if there are any incompatible attributes we cannot drop safely.
  3131. if (AttrBuilder(FT->getContext(), CallerPAL.getParamAttrs(i))
  3132. .overlaps(AttributeFuncs::typeIncompatible(
  3133. ParamTy, AttributeFuncs::ASK_UNSAFE_TO_DROP)))
  3134. return false; // Attribute not compatible with transformed value.
  3135. if (Call.isInAllocaArgument(i) ||
  3136. CallerPAL.hasParamAttr(i, Attribute::Preallocated))
  3137. return false; // Cannot transform to and from inalloca/preallocated.
  3138. if (CallerPAL.hasParamAttr(i, Attribute::SwiftError))
  3139. return false;
  3140. if (CallerPAL.hasParamAttr(i, Attribute::ByVal) !=
  3141. Callee->getAttributes().hasParamAttr(i, Attribute::ByVal))
  3142. return false; // Cannot transform to or from byval.
  3143. // If the parameter is passed as a byval argument, then we have to have a
  3144. // sized type and the sized type has to have the same size as the old type.
  3145. if (ParamTy != ActTy && CallerPAL.hasParamAttr(i, Attribute::ByVal)) {
  3146. PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy);
  3147. if (!ParamPTy)
  3148. return false;
  3149. if (!ParamPTy->isOpaque()) {
  3150. Type *ParamElTy = ParamPTy->getNonOpaquePointerElementType();
  3151. if (!ParamElTy->isSized())
  3152. return false;
  3153. Type *CurElTy = Call.getParamByValType(i);
  3154. if (DL.getTypeAllocSize(CurElTy) != DL.getTypeAllocSize(ParamElTy))
  3155. return false;
  3156. }
  3157. }
  3158. }
  3159. if (Callee->isDeclaration()) {
  3160. // Do not delete arguments unless we have a function body.
  3161. if (FT->getNumParams() < NumActualArgs && !FT->isVarArg())
  3162. return false;
  3163. // If the callee is just a declaration, don't change the varargsness of the
  3164. // call. We don't want to introduce a varargs call where one doesn't
  3165. // already exist.
  3166. if (FT->isVarArg() != Call.getFunctionType()->isVarArg())
  3167. return false;
  3168. // If both the callee and the cast type are varargs, we still have to make
  3169. // sure the number of fixed parameters are the same or we have the same
  3170. // ABI issues as if we introduce a varargs call.
  3171. if (FT->isVarArg() && Call.getFunctionType()->isVarArg() &&
  3172. FT->getNumParams() != Call.getFunctionType()->getNumParams())
  3173. return false;
  3174. }
  3175. if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
  3176. !CallerPAL.isEmpty()) {
  3177. // In this case we have more arguments than the new function type, but we
  3178. // won't be dropping them. Check that these extra arguments have attributes
  3179. // that are compatible with being a vararg call argument.
  3180. unsigned SRetIdx;
  3181. if (CallerPAL.hasAttrSomewhere(Attribute::StructRet, &SRetIdx) &&
  3182. SRetIdx - AttributeList::FirstArgIndex >= FT->getNumParams())
  3183. return false;
  3184. }
  3185. // Okay, we decided that this is a safe thing to do: go ahead and start
  3186. // inserting cast instructions as necessary.
  3187. SmallVector<Value *, 8> Args;
  3188. SmallVector<AttributeSet, 8> ArgAttrs;
  3189. Args.reserve(NumActualArgs);
  3190. ArgAttrs.reserve(NumActualArgs);
  3191. // Get any return attributes.
  3192. AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs());
  3193. // If the return value is not being used, the type may not be compatible
  3194. // with the existing attributes. Wipe out any problematic attributes.
  3195. RAttrs.remove(AttributeFuncs::typeIncompatible(NewRetTy));
  3196. LLVMContext &Ctx = Call.getContext();
  3197. AI = Call.arg_begin();
  3198. for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
  3199. Type *ParamTy = FT->getParamType(i);
  3200. Value *NewArg = *AI;
  3201. if ((*AI)->getType() != ParamTy)
  3202. NewArg = Builder.CreateBitOrPointerCast(*AI, ParamTy);
  3203. Args.push_back(NewArg);
  3204. // Add any parameter attributes except the ones incompatible with the new
  3205. // type. Note that we made sure all incompatible ones are safe to drop.
  3206. AttributeMask IncompatibleAttrs = AttributeFuncs::typeIncompatible(
  3207. ParamTy, AttributeFuncs::ASK_SAFE_TO_DROP);
  3208. if (CallerPAL.hasParamAttr(i, Attribute::ByVal) &&
  3209. !ParamTy->isOpaquePointerTy()) {
  3210. AttrBuilder AB(Ctx, CallerPAL.getParamAttrs(i).removeAttributes(
  3211. Ctx, IncompatibleAttrs));
  3212. AB.addByValAttr(ParamTy->getNonOpaquePointerElementType());
  3213. ArgAttrs.push_back(AttributeSet::get(Ctx, AB));
  3214. } else {
  3215. ArgAttrs.push_back(
  3216. CallerPAL.getParamAttrs(i).removeAttributes(Ctx, IncompatibleAttrs));
  3217. }
  3218. }
  3219. // If the function takes more arguments than the call was taking, add them
  3220. // now.
  3221. for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) {
  3222. Args.push_back(Constant::getNullValue(FT->getParamType(i)));
  3223. ArgAttrs.push_back(AttributeSet());
  3224. }
  3225. // If we are removing arguments to the function, emit an obnoxious warning.
  3226. if (FT->getNumParams() < NumActualArgs) {
  3227. // TODO: if (!FT->isVarArg()) this call may be unreachable. PR14722
  3228. if (FT->isVarArg()) {
  3229. // Add all of the arguments in their promoted form to the arg list.
  3230. for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
  3231. Type *PTy = getPromotedType((*AI)->getType());
  3232. Value *NewArg = *AI;
  3233. if (PTy != (*AI)->getType()) {
  3234. // Must promote to pass through va_arg area!
  3235. Instruction::CastOps opcode =
  3236. CastInst::getCastOpcode(*AI, false, PTy, false);
  3237. NewArg = Builder.CreateCast(opcode, *AI, PTy);
  3238. }
  3239. Args.push_back(NewArg);
  3240. // Add any parameter attributes.
  3241. ArgAttrs.push_back(CallerPAL.getParamAttrs(i));
  3242. }
  3243. }
  3244. }
  3245. AttributeSet FnAttrs = CallerPAL.getFnAttrs();
  3246. if (NewRetTy->isVoidTy())
  3247. Caller->setName(""); // Void type should not have a name.
  3248. assert((ArgAttrs.size() == FT->getNumParams() || FT->isVarArg()) &&
  3249. "missing argument attributes");
  3250. AttributeList NewCallerPAL = AttributeList::get(
  3251. Ctx, FnAttrs, AttributeSet::get(Ctx, RAttrs), ArgAttrs);
  3252. SmallVector<OperandBundleDef, 1> OpBundles;
  3253. Call.getOperandBundlesAsDefs(OpBundles);
  3254. CallBase *NewCall;
  3255. if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
  3256. NewCall = Builder.CreateInvoke(Callee, II->getNormalDest(),
  3257. II->getUnwindDest(), Args, OpBundles);
  3258. } else if (CallBrInst *CBI = dyn_cast<CallBrInst>(Caller)) {
  3259. NewCall = Builder.CreateCallBr(Callee, CBI->getDefaultDest(),
  3260. CBI->getIndirectDests(), Args, OpBundles);
  3261. } else {
  3262. NewCall = Builder.CreateCall(Callee, Args, OpBundles);
  3263. cast<CallInst>(NewCall)->setTailCallKind(
  3264. cast<CallInst>(Caller)->getTailCallKind());
  3265. }
  3266. NewCall->takeName(Caller);
  3267. NewCall->setCallingConv(Call.getCallingConv());
  3268. NewCall->setAttributes(NewCallerPAL);
  3269. // Preserve prof metadata if any.
  3270. NewCall->copyMetadata(*Caller, {LLVMContext::MD_prof});
  3271. // Insert a cast of the return type as necessary.
  3272. Instruction *NC = NewCall;
  3273. Value *NV = NC;
  3274. if (OldRetTy != NV->getType() && !Caller->use_empty()) {
  3275. if (!NV->getType()->isVoidTy()) {
  3276. NV = NC = CastInst::CreateBitOrPointerCast(NC, OldRetTy);
  3277. NC->setDebugLoc(Caller->getDebugLoc());
  3278. Instruction *InsertPt = NewCall->getInsertionPointAfterDef();
  3279. assert(InsertPt && "No place to insert cast");
  3280. InsertNewInstBefore(NC, *InsertPt);
  3281. Worklist.pushUsersToWorkList(*Caller);
  3282. } else {
  3283. NV = PoisonValue::get(Caller->getType());
  3284. }
  3285. }
  3286. if (!Caller->use_empty())
  3287. replaceInstUsesWith(*Caller, NV);
  3288. else if (Caller->hasValueHandle()) {
  3289. if (OldRetTy == NV->getType())
  3290. ValueHandleBase::ValueIsRAUWd(Caller, NV);
  3291. else
  3292. // We cannot call ValueIsRAUWd with a different type, and the
  3293. // actual tracked value will disappear.
  3294. ValueHandleBase::ValueIsDeleted(Caller);
  3295. }
  3296. eraseInstFromFunction(*Caller);
  3297. return true;
  3298. }
  3299. /// Turn a call to a function created by init_trampoline / adjust_trampoline
  3300. /// intrinsic pair into a direct call to the underlying function.
  3301. Instruction *
  3302. InstCombinerImpl::transformCallThroughTrampoline(CallBase &Call,
  3303. IntrinsicInst &Tramp) {
  3304. Value *Callee = Call.getCalledOperand();
  3305. Type *CalleeTy = Callee->getType();
  3306. FunctionType *FTy = Call.getFunctionType();
  3307. AttributeList Attrs = Call.getAttributes();
  3308. // If the call already has the 'nest' attribute somewhere then give up -
  3309. // otherwise 'nest' would occur twice after splicing in the chain.
  3310. if (Attrs.hasAttrSomewhere(Attribute::Nest))
  3311. return nullptr;
  3312. Function *NestF = cast<Function>(Tramp.getArgOperand(1)->stripPointerCasts());
  3313. FunctionType *NestFTy = NestF->getFunctionType();
  3314. AttributeList NestAttrs = NestF->getAttributes();
  3315. if (!NestAttrs.isEmpty()) {
  3316. unsigned NestArgNo = 0;
  3317. Type *NestTy = nullptr;
  3318. AttributeSet NestAttr;
  3319. // Look for a parameter marked with the 'nest' attribute.
  3320. for (FunctionType::param_iterator I = NestFTy->param_begin(),
  3321. E = NestFTy->param_end();
  3322. I != E; ++NestArgNo, ++I) {
  3323. AttributeSet AS = NestAttrs.getParamAttrs(NestArgNo);
  3324. if (AS.hasAttribute(Attribute::Nest)) {
  3325. // Record the parameter type and any other attributes.
  3326. NestTy = *I;
  3327. NestAttr = AS;
  3328. break;
  3329. }
  3330. }
  3331. if (NestTy) {
  3332. std::vector<Value*> NewArgs;
  3333. std::vector<AttributeSet> NewArgAttrs;
  3334. NewArgs.reserve(Call.arg_size() + 1);
  3335. NewArgAttrs.reserve(Call.arg_size());
  3336. // Insert the nest argument into the call argument list, which may
  3337. // mean appending it. Likewise for attributes.
  3338. {
  3339. unsigned ArgNo = 0;
  3340. auto I = Call.arg_begin(), E = Call.arg_end();
  3341. do {
  3342. if (ArgNo == NestArgNo) {
  3343. // Add the chain argument and attributes.
  3344. Value *NestVal = Tramp.getArgOperand(2);
  3345. if (NestVal->getType() != NestTy)
  3346. NestVal = Builder.CreateBitCast(NestVal, NestTy, "nest");
  3347. NewArgs.push_back(NestVal);
  3348. NewArgAttrs.push_back(NestAttr);
  3349. }
  3350. if (I == E)
  3351. break;
  3352. // Add the original argument and attributes.
  3353. NewArgs.push_back(*I);
  3354. NewArgAttrs.push_back(Attrs.getParamAttrs(ArgNo));
  3355. ++ArgNo;
  3356. ++I;
  3357. } while (true);
  3358. }
  3359. // The trampoline may have been bitcast to a bogus type (FTy).
  3360. // Handle this by synthesizing a new function type, equal to FTy
  3361. // with the chain parameter inserted.
  3362. std::vector<Type*> NewTypes;
  3363. NewTypes.reserve(FTy->getNumParams()+1);
  3364. // Insert the chain's type into the list of parameter types, which may
  3365. // mean appending it.
  3366. {
  3367. unsigned ArgNo = 0;
  3368. FunctionType::param_iterator I = FTy->param_begin(),
  3369. E = FTy->param_end();
  3370. do {
  3371. if (ArgNo == NestArgNo)
  3372. // Add the chain's type.
  3373. NewTypes.push_back(NestTy);
  3374. if (I == E)
  3375. break;
  3376. // Add the original type.
  3377. NewTypes.push_back(*I);
  3378. ++ArgNo;
  3379. ++I;
  3380. } while (true);
  3381. }
  3382. // Replace the trampoline call with a direct call. Let the generic
  3383. // code sort out any function type mismatches.
  3384. FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes,
  3385. FTy->isVarArg());
  3386. Constant *NewCallee =
  3387. NestF->getType() == PointerType::getUnqual(NewFTy) ?
  3388. NestF : ConstantExpr::getBitCast(NestF,
  3389. PointerType::getUnqual(NewFTy));
  3390. AttributeList NewPAL =
  3391. AttributeList::get(FTy->getContext(), Attrs.getFnAttrs(),
  3392. Attrs.getRetAttrs(), NewArgAttrs);
  3393. SmallVector<OperandBundleDef, 1> OpBundles;
  3394. Call.getOperandBundlesAsDefs(OpBundles);
  3395. Instruction *NewCaller;
  3396. if (InvokeInst *II = dyn_cast<InvokeInst>(&Call)) {
  3397. NewCaller = InvokeInst::Create(NewFTy, NewCallee,
  3398. II->getNormalDest(), II->getUnwindDest(),
  3399. NewArgs, OpBundles);
  3400. cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
  3401. cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
  3402. } else if (CallBrInst *CBI = dyn_cast<CallBrInst>(&Call)) {
  3403. NewCaller =
  3404. CallBrInst::Create(NewFTy, NewCallee, CBI->getDefaultDest(),
  3405. CBI->getIndirectDests(), NewArgs, OpBundles);
  3406. cast<CallBrInst>(NewCaller)->setCallingConv(CBI->getCallingConv());
  3407. cast<CallBrInst>(NewCaller)->setAttributes(NewPAL);
  3408. } else {
  3409. NewCaller = CallInst::Create(NewFTy, NewCallee, NewArgs, OpBundles);
  3410. cast<CallInst>(NewCaller)->setTailCallKind(
  3411. cast<CallInst>(Call).getTailCallKind());
  3412. cast<CallInst>(NewCaller)->setCallingConv(
  3413. cast<CallInst>(Call).getCallingConv());
  3414. cast<CallInst>(NewCaller)->setAttributes(NewPAL);
  3415. }
  3416. NewCaller->setDebugLoc(Call.getDebugLoc());
  3417. return NewCaller;
  3418. }
  3419. }
  3420. // Replace the trampoline call with a direct call. Since there is no 'nest'
  3421. // parameter, there is no need to adjust the argument list. Let the generic
  3422. // code sort out any function type mismatches.
  3423. Constant *NewCallee = ConstantExpr::getBitCast(NestF, CalleeTy);
  3424. Call.setCalledFunction(FTy, NewCallee);
  3425. return &Call;
  3426. }