ConstantFolding.cpp 114 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225
  1. //===-- ConstantFolding.cpp - Fold instructions into constants ------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines routines for folding instructions into constants.
  10. //
  11. // Also, to supplement the basic IR ConstantExpr simplifications,
  12. // this file defines some additional folding routines that can make use of
  13. // DataLayout information. These functions cannot go in IR due to library
  14. // dependency issues.
  15. //
  16. //===----------------------------------------------------------------------===//
  17. #include "llvm/Analysis/ConstantFolding.h"
  18. #include "llvm/ADT/APFloat.h"
  19. #include "llvm/ADT/APInt.h"
  20. #include "llvm/ADT/APSInt.h"
  21. #include "llvm/ADT/ArrayRef.h"
  22. #include "llvm/ADT/DenseMap.h"
  23. #include "llvm/ADT/STLExtras.h"
  24. #include "llvm/ADT/SmallVector.h"
  25. #include "llvm/ADT/StringRef.h"
  26. #include "llvm/Analysis/TargetFolder.h"
  27. #include "llvm/Analysis/TargetLibraryInfo.h"
  28. #include "llvm/Analysis/ValueTracking.h"
  29. #include "llvm/Analysis/VectorUtils.h"
  30. #include "llvm/Config/config.h"
  31. #include "llvm/IR/Constant.h"
  32. #include "llvm/IR/Constants.h"
  33. #include "llvm/IR/DataLayout.h"
  34. #include "llvm/IR/DerivedTypes.h"
  35. #include "llvm/IR/Function.h"
  36. #include "llvm/IR/GlobalValue.h"
  37. #include "llvm/IR/GlobalVariable.h"
  38. #include "llvm/IR/InstrTypes.h"
  39. #include "llvm/IR/Instruction.h"
  40. #include "llvm/IR/Instructions.h"
  41. #include "llvm/IR/IntrinsicInst.h"
  42. #include "llvm/IR/Intrinsics.h"
  43. #include "llvm/IR/IntrinsicsAArch64.h"
  44. #include "llvm/IR/IntrinsicsAMDGPU.h"
  45. #include "llvm/IR/IntrinsicsARM.h"
  46. #include "llvm/IR/IntrinsicsWebAssembly.h"
  47. #include "llvm/IR/IntrinsicsX86.h"
  48. #include "llvm/IR/Operator.h"
  49. #include "llvm/IR/Type.h"
  50. #include "llvm/IR/Value.h"
  51. #include "llvm/Support/Casting.h"
  52. #include "llvm/Support/ErrorHandling.h"
  53. #include "llvm/Support/KnownBits.h"
  54. #include "llvm/Support/MathExtras.h"
  55. #include <cassert>
  56. #include <cerrno>
  57. #include <cfenv>
  58. #include <cmath>
  59. #include <cstddef>
  60. #include <cstdint>
  61. using namespace llvm;
  62. namespace {
  63. //===----------------------------------------------------------------------===//
  64. // Constant Folding internal helper functions
  65. //===----------------------------------------------------------------------===//
  66. static Constant *foldConstVectorToAPInt(APInt &Result, Type *DestTy,
  67. Constant *C, Type *SrcEltTy,
  68. unsigned NumSrcElts,
  69. const DataLayout &DL) {
  70. // Now that we know that the input value is a vector of integers, just shift
  71. // and insert them into our result.
  72. unsigned BitShift = DL.getTypeSizeInBits(SrcEltTy);
  73. for (unsigned i = 0; i != NumSrcElts; ++i) {
  74. Constant *Element;
  75. if (DL.isLittleEndian())
  76. Element = C->getAggregateElement(NumSrcElts - i - 1);
  77. else
  78. Element = C->getAggregateElement(i);
  79. if (Element && isa<UndefValue>(Element)) {
  80. Result <<= BitShift;
  81. continue;
  82. }
  83. auto *ElementCI = dyn_cast_or_null<ConstantInt>(Element);
  84. if (!ElementCI)
  85. return ConstantExpr::getBitCast(C, DestTy);
  86. Result <<= BitShift;
  87. Result |= ElementCI->getValue().zextOrSelf(Result.getBitWidth());
  88. }
  89. return nullptr;
  90. }
  91. /// Constant fold bitcast, symbolically evaluating it with DataLayout.
  92. /// This always returns a non-null constant, but it may be a
  93. /// ConstantExpr if unfoldable.
  94. Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) {
  95. assert(CastInst::castIsValid(Instruction::BitCast, C, DestTy) &&
  96. "Invalid constantexpr bitcast!");
  97. // Catch the obvious splat cases.
  98. if (Constant *Res = ConstantFoldLoadFromUniformValue(C, DestTy))
  99. return Res;
  100. if (auto *VTy = dyn_cast<VectorType>(C->getType())) {
  101. // Handle a vector->scalar integer/fp cast.
  102. if (isa<IntegerType>(DestTy) || DestTy->isFloatingPointTy()) {
  103. unsigned NumSrcElts = cast<FixedVectorType>(VTy)->getNumElements();
  104. Type *SrcEltTy = VTy->getElementType();
  105. // If the vector is a vector of floating point, convert it to vector of int
  106. // to simplify things.
  107. if (SrcEltTy->isFloatingPointTy()) {
  108. unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits();
  109. auto *SrcIVTy = FixedVectorType::get(
  110. IntegerType::get(C->getContext(), FPWidth), NumSrcElts);
  111. // Ask IR to do the conversion now that #elts line up.
  112. C = ConstantExpr::getBitCast(C, SrcIVTy);
  113. }
  114. APInt Result(DL.getTypeSizeInBits(DestTy), 0);
  115. if (Constant *CE = foldConstVectorToAPInt(Result, DestTy, C,
  116. SrcEltTy, NumSrcElts, DL))
  117. return CE;
  118. if (isa<IntegerType>(DestTy))
  119. return ConstantInt::get(DestTy, Result);
  120. APFloat FP(DestTy->getFltSemantics(), Result);
  121. return ConstantFP::get(DestTy->getContext(), FP);
  122. }
  123. }
  124. // The code below only handles casts to vectors currently.
  125. auto *DestVTy = dyn_cast<VectorType>(DestTy);
  126. if (!DestVTy)
  127. return ConstantExpr::getBitCast(C, DestTy);
  128. // If this is a scalar -> vector cast, convert the input into a <1 x scalar>
  129. // vector so the code below can handle it uniformly.
  130. if (isa<ConstantFP>(C) || isa<ConstantInt>(C)) {
  131. Constant *Ops = C; // don't take the address of C!
  132. return FoldBitCast(ConstantVector::get(Ops), DestTy, DL);
  133. }
  134. // If this is a bitcast from constant vector -> vector, fold it.
  135. if (!isa<ConstantDataVector>(C) && !isa<ConstantVector>(C))
  136. return ConstantExpr::getBitCast(C, DestTy);
  137. // If the element types match, IR can fold it.
  138. unsigned NumDstElt = cast<FixedVectorType>(DestVTy)->getNumElements();
  139. unsigned NumSrcElt = cast<FixedVectorType>(C->getType())->getNumElements();
  140. if (NumDstElt == NumSrcElt)
  141. return ConstantExpr::getBitCast(C, DestTy);
  142. Type *SrcEltTy = cast<VectorType>(C->getType())->getElementType();
  143. Type *DstEltTy = DestVTy->getElementType();
  144. // Otherwise, we're changing the number of elements in a vector, which
  145. // requires endianness information to do the right thing. For example,
  146. // bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)
  147. // folds to (little endian):
  148. // <4 x i32> <i32 0, i32 0, i32 1, i32 0>
  149. // and to (big endian):
  150. // <4 x i32> <i32 0, i32 0, i32 0, i32 1>
  151. // First thing is first. We only want to think about integer here, so if
  152. // we have something in FP form, recast it as integer.
  153. if (DstEltTy->isFloatingPointTy()) {
  154. // Fold to an vector of integers with same size as our FP type.
  155. unsigned FPWidth = DstEltTy->getPrimitiveSizeInBits();
  156. auto *DestIVTy = FixedVectorType::get(
  157. IntegerType::get(C->getContext(), FPWidth), NumDstElt);
  158. // Recursively handle this integer conversion, if possible.
  159. C = FoldBitCast(C, DestIVTy, DL);
  160. // Finally, IR can handle this now that #elts line up.
  161. return ConstantExpr::getBitCast(C, DestTy);
  162. }
  163. // Okay, we know the destination is integer, if the input is FP, convert
  164. // it to integer first.
  165. if (SrcEltTy->isFloatingPointTy()) {
  166. unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits();
  167. auto *SrcIVTy = FixedVectorType::get(
  168. IntegerType::get(C->getContext(), FPWidth), NumSrcElt);
  169. // Ask IR to do the conversion now that #elts line up.
  170. C = ConstantExpr::getBitCast(C, SrcIVTy);
  171. // If IR wasn't able to fold it, bail out.
  172. if (!isa<ConstantVector>(C) && // FIXME: Remove ConstantVector.
  173. !isa<ConstantDataVector>(C))
  174. return C;
  175. }
  176. // Now we know that the input and output vectors are both integer vectors
  177. // of the same size, and that their #elements is not the same. Do the
  178. // conversion here, which depends on whether the input or output has
  179. // more elements.
  180. bool isLittleEndian = DL.isLittleEndian();
  181. SmallVector<Constant*, 32> Result;
  182. if (NumDstElt < NumSrcElt) {
  183. // Handle: bitcast (<4 x i32> <i32 0, i32 1, i32 2, i32 3> to <2 x i64>)
  184. Constant *Zero = Constant::getNullValue(DstEltTy);
  185. unsigned Ratio = NumSrcElt/NumDstElt;
  186. unsigned SrcBitSize = SrcEltTy->getPrimitiveSizeInBits();
  187. unsigned SrcElt = 0;
  188. for (unsigned i = 0; i != NumDstElt; ++i) {
  189. // Build each element of the result.
  190. Constant *Elt = Zero;
  191. unsigned ShiftAmt = isLittleEndian ? 0 : SrcBitSize*(Ratio-1);
  192. for (unsigned j = 0; j != Ratio; ++j) {
  193. Constant *Src = C->getAggregateElement(SrcElt++);
  194. if (Src && isa<UndefValue>(Src))
  195. Src = Constant::getNullValue(
  196. cast<VectorType>(C->getType())->getElementType());
  197. else
  198. Src = dyn_cast_or_null<ConstantInt>(Src);
  199. if (!Src) // Reject constantexpr elements.
  200. return ConstantExpr::getBitCast(C, DestTy);
  201. // Zero extend the element to the right size.
  202. Src = ConstantExpr::getZExt(Src, Elt->getType());
  203. // Shift it to the right place, depending on endianness.
  204. Src = ConstantExpr::getShl(Src,
  205. ConstantInt::get(Src->getType(), ShiftAmt));
  206. ShiftAmt += isLittleEndian ? SrcBitSize : -SrcBitSize;
  207. // Mix it in.
  208. Elt = ConstantExpr::getOr(Elt, Src);
  209. }
  210. Result.push_back(Elt);
  211. }
  212. return ConstantVector::get(Result);
  213. }
  214. // Handle: bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)
  215. unsigned Ratio = NumDstElt/NumSrcElt;
  216. unsigned DstBitSize = DL.getTypeSizeInBits(DstEltTy);
  217. // Loop over each source value, expanding into multiple results.
  218. for (unsigned i = 0; i != NumSrcElt; ++i) {
  219. auto *Element = C->getAggregateElement(i);
  220. if (!Element) // Reject constantexpr elements.
  221. return ConstantExpr::getBitCast(C, DestTy);
  222. if (isa<UndefValue>(Element)) {
  223. // Correctly Propagate undef values.
  224. Result.append(Ratio, UndefValue::get(DstEltTy));
  225. continue;
  226. }
  227. auto *Src = dyn_cast<ConstantInt>(Element);
  228. if (!Src)
  229. return ConstantExpr::getBitCast(C, DestTy);
  230. unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize*(Ratio-1);
  231. for (unsigned j = 0; j != Ratio; ++j) {
  232. // Shift the piece of the value into the right place, depending on
  233. // endianness.
  234. Constant *Elt = ConstantExpr::getLShr(Src,
  235. ConstantInt::get(Src->getType(), ShiftAmt));
  236. ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize;
  237. // Truncate the element to an integer with the same pointer size and
  238. // convert the element back to a pointer using a inttoptr.
  239. if (DstEltTy->isPointerTy()) {
  240. IntegerType *DstIntTy = Type::getIntNTy(C->getContext(), DstBitSize);
  241. Constant *CE = ConstantExpr::getTrunc(Elt, DstIntTy);
  242. Result.push_back(ConstantExpr::getIntToPtr(CE, DstEltTy));
  243. continue;
  244. }
  245. // Truncate and remember this piece.
  246. Result.push_back(ConstantExpr::getTrunc(Elt, DstEltTy));
  247. }
  248. }
  249. return ConstantVector::get(Result);
  250. }
  251. } // end anonymous namespace
  252. /// If this constant is a constant offset from a global, return the global and
  253. /// the constant. Because of constantexprs, this function is recursive.
  254. bool llvm::IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,
  255. APInt &Offset, const DataLayout &DL,
  256. DSOLocalEquivalent **DSOEquiv) {
  257. if (DSOEquiv)
  258. *DSOEquiv = nullptr;
  259. // Trivial case, constant is the global.
  260. if ((GV = dyn_cast<GlobalValue>(C))) {
  261. unsigned BitWidth = DL.getIndexTypeSizeInBits(GV->getType());
  262. Offset = APInt(BitWidth, 0);
  263. return true;
  264. }
  265. if (auto *FoundDSOEquiv = dyn_cast<DSOLocalEquivalent>(C)) {
  266. if (DSOEquiv)
  267. *DSOEquiv = FoundDSOEquiv;
  268. GV = FoundDSOEquiv->getGlobalValue();
  269. unsigned BitWidth = DL.getIndexTypeSizeInBits(GV->getType());
  270. Offset = APInt(BitWidth, 0);
  271. return true;
  272. }
  273. // Otherwise, if this isn't a constant expr, bail out.
  274. auto *CE = dyn_cast<ConstantExpr>(C);
  275. if (!CE) return false;
  276. // Look through ptr->int and ptr->ptr casts.
  277. if (CE->getOpcode() == Instruction::PtrToInt ||
  278. CE->getOpcode() == Instruction::BitCast)
  279. return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, DL,
  280. DSOEquiv);
  281. // i32* getelementptr ([5 x i32]* @a, i32 0, i32 5)
  282. auto *GEP = dyn_cast<GEPOperator>(CE);
  283. if (!GEP)
  284. return false;
  285. unsigned BitWidth = DL.getIndexTypeSizeInBits(GEP->getType());
  286. APInt TmpOffset(BitWidth, 0);
  287. // If the base isn't a global+constant, we aren't either.
  288. if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, TmpOffset, DL,
  289. DSOEquiv))
  290. return false;
  291. // Otherwise, add any offset that our operands provide.
  292. if (!GEP->accumulateConstantOffset(DL, TmpOffset))
  293. return false;
  294. Offset = TmpOffset;
  295. return true;
  296. }
  297. Constant *llvm::ConstantFoldLoadThroughBitcast(Constant *C, Type *DestTy,
  298. const DataLayout &DL) {
  299. do {
  300. Type *SrcTy = C->getType();
  301. if (SrcTy == DestTy)
  302. return C;
  303. TypeSize DestSize = DL.getTypeSizeInBits(DestTy);
  304. TypeSize SrcSize = DL.getTypeSizeInBits(SrcTy);
  305. if (!TypeSize::isKnownGE(SrcSize, DestSize))
  306. return nullptr;
  307. // Catch the obvious splat cases (since all-zeros can coerce non-integral
  308. // pointers legally).
  309. if (Constant *Res = ConstantFoldLoadFromUniformValue(C, DestTy))
  310. return Res;
  311. // If the type sizes are the same and a cast is legal, just directly
  312. // cast the constant.
  313. // But be careful not to coerce non-integral pointers illegally.
  314. if (SrcSize == DestSize &&
  315. DL.isNonIntegralPointerType(SrcTy->getScalarType()) ==
  316. DL.isNonIntegralPointerType(DestTy->getScalarType())) {
  317. Instruction::CastOps Cast = Instruction::BitCast;
  318. // If we are going from a pointer to int or vice versa, we spell the cast
  319. // differently.
  320. if (SrcTy->isIntegerTy() && DestTy->isPointerTy())
  321. Cast = Instruction::IntToPtr;
  322. else if (SrcTy->isPointerTy() && DestTy->isIntegerTy())
  323. Cast = Instruction::PtrToInt;
  324. if (CastInst::castIsValid(Cast, C, DestTy))
  325. return ConstantExpr::getCast(Cast, C, DestTy);
  326. }
  327. // If this isn't an aggregate type, there is nothing we can do to drill down
  328. // and find a bitcastable constant.
  329. if (!SrcTy->isAggregateType() && !SrcTy->isVectorTy())
  330. return nullptr;
  331. // We're simulating a load through a pointer that was bitcast to point to
  332. // a different type, so we can try to walk down through the initial
  333. // elements of an aggregate to see if some part of the aggregate is
  334. // castable to implement the "load" semantic model.
  335. if (SrcTy->isStructTy()) {
  336. // Struct types might have leading zero-length elements like [0 x i32],
  337. // which are certainly not what we are looking for, so skip them.
  338. unsigned Elem = 0;
  339. Constant *ElemC;
  340. do {
  341. ElemC = C->getAggregateElement(Elem++);
  342. } while (ElemC && DL.getTypeSizeInBits(ElemC->getType()).isZero());
  343. C = ElemC;
  344. } else {
  345. // For non-byte-sized vector elements, the first element is not
  346. // necessarily located at the vector base address.
  347. if (auto *VT = dyn_cast<VectorType>(SrcTy))
  348. if (!DL.typeSizeEqualsStoreSize(VT->getElementType()))
  349. return nullptr;
  350. C = C->getAggregateElement(0u);
  351. }
  352. } while (C);
  353. return nullptr;
  354. }
  355. namespace {
  356. /// Recursive helper to read bits out of global. C is the constant being copied
  357. /// out of. ByteOffset is an offset into C. CurPtr is the pointer to copy
  358. /// results into and BytesLeft is the number of bytes left in
  359. /// the CurPtr buffer. DL is the DataLayout.
  360. bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, unsigned char *CurPtr,
  361. unsigned BytesLeft, const DataLayout &DL) {
  362. assert(ByteOffset <= DL.getTypeAllocSize(C->getType()) &&
  363. "Out of range access");
  364. // If this element is zero or undefined, we can just return since *CurPtr is
  365. // zero initialized.
  366. if (isa<ConstantAggregateZero>(C) || isa<UndefValue>(C))
  367. return true;
  368. if (auto *CI = dyn_cast<ConstantInt>(C)) {
  369. if (CI->getBitWidth() > 64 ||
  370. (CI->getBitWidth() & 7) != 0)
  371. return false;
  372. uint64_t Val = CI->getZExtValue();
  373. unsigned IntBytes = unsigned(CI->getBitWidth()/8);
  374. for (unsigned i = 0; i != BytesLeft && ByteOffset != IntBytes; ++i) {
  375. int n = ByteOffset;
  376. if (!DL.isLittleEndian())
  377. n = IntBytes - n - 1;
  378. CurPtr[i] = (unsigned char)(Val >> (n * 8));
  379. ++ByteOffset;
  380. }
  381. return true;
  382. }
  383. if (auto *CFP = dyn_cast<ConstantFP>(C)) {
  384. if (CFP->getType()->isDoubleTy()) {
  385. C = FoldBitCast(C, Type::getInt64Ty(C->getContext()), DL);
  386. return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL);
  387. }
  388. if (CFP->getType()->isFloatTy()){
  389. C = FoldBitCast(C, Type::getInt32Ty(C->getContext()), DL);
  390. return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL);
  391. }
  392. if (CFP->getType()->isHalfTy()){
  393. C = FoldBitCast(C, Type::getInt16Ty(C->getContext()), DL);
  394. return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL);
  395. }
  396. return false;
  397. }
  398. if (auto *CS = dyn_cast<ConstantStruct>(C)) {
  399. const StructLayout *SL = DL.getStructLayout(CS->getType());
  400. unsigned Index = SL->getElementContainingOffset(ByteOffset);
  401. uint64_t CurEltOffset = SL->getElementOffset(Index);
  402. ByteOffset -= CurEltOffset;
  403. while (true) {
  404. // If the element access is to the element itself and not to tail padding,
  405. // read the bytes from the element.
  406. uint64_t EltSize = DL.getTypeAllocSize(CS->getOperand(Index)->getType());
  407. if (ByteOffset < EltSize &&
  408. !ReadDataFromGlobal(CS->getOperand(Index), ByteOffset, CurPtr,
  409. BytesLeft, DL))
  410. return false;
  411. ++Index;
  412. // Check to see if we read from the last struct element, if so we're done.
  413. if (Index == CS->getType()->getNumElements())
  414. return true;
  415. // If we read all of the bytes we needed from this element we're done.
  416. uint64_t NextEltOffset = SL->getElementOffset(Index);
  417. if (BytesLeft <= NextEltOffset - CurEltOffset - ByteOffset)
  418. return true;
  419. // Move to the next element of the struct.
  420. CurPtr += NextEltOffset - CurEltOffset - ByteOffset;
  421. BytesLeft -= NextEltOffset - CurEltOffset - ByteOffset;
  422. ByteOffset = 0;
  423. CurEltOffset = NextEltOffset;
  424. }
  425. // not reached.
  426. }
  427. if (isa<ConstantArray>(C) || isa<ConstantVector>(C) ||
  428. isa<ConstantDataSequential>(C)) {
  429. uint64_t NumElts;
  430. Type *EltTy;
  431. if (auto *AT = dyn_cast<ArrayType>(C->getType())) {
  432. NumElts = AT->getNumElements();
  433. EltTy = AT->getElementType();
  434. } else {
  435. NumElts = cast<FixedVectorType>(C->getType())->getNumElements();
  436. EltTy = cast<FixedVectorType>(C->getType())->getElementType();
  437. }
  438. uint64_t EltSize = DL.getTypeAllocSize(EltTy);
  439. uint64_t Index = ByteOffset / EltSize;
  440. uint64_t Offset = ByteOffset - Index * EltSize;
  441. for (; Index != NumElts; ++Index) {
  442. if (!ReadDataFromGlobal(C->getAggregateElement(Index), Offset, CurPtr,
  443. BytesLeft, DL))
  444. return false;
  445. uint64_t BytesWritten = EltSize - Offset;
  446. assert(BytesWritten <= EltSize && "Not indexing into this element?");
  447. if (BytesWritten >= BytesLeft)
  448. return true;
  449. Offset = 0;
  450. BytesLeft -= BytesWritten;
  451. CurPtr += BytesWritten;
  452. }
  453. return true;
  454. }
  455. if (auto *CE = dyn_cast<ConstantExpr>(C)) {
  456. if (CE->getOpcode() == Instruction::IntToPtr &&
  457. CE->getOperand(0)->getType() == DL.getIntPtrType(CE->getType())) {
  458. return ReadDataFromGlobal(CE->getOperand(0), ByteOffset, CurPtr,
  459. BytesLeft, DL);
  460. }
  461. }
  462. // Otherwise, unknown initializer type.
  463. return false;
  464. }
  465. Constant *FoldReinterpretLoadFromConst(Constant *C, Type *LoadTy,
  466. int64_t Offset, const DataLayout &DL) {
  467. // Bail out early. Not expect to load from scalable global variable.
  468. if (isa<ScalableVectorType>(LoadTy))
  469. return nullptr;
  470. auto *IntType = dyn_cast<IntegerType>(LoadTy);
  471. // If this isn't an integer load we can't fold it directly.
  472. if (!IntType) {
  473. // If this is a non-integer load, we can try folding it as an int load and
  474. // then bitcast the result. This can be useful for union cases. Note
  475. // that address spaces don't matter here since we're not going to result in
  476. // an actual new load.
  477. if (!LoadTy->isFloatingPointTy() && !LoadTy->isPointerTy() &&
  478. !LoadTy->isVectorTy())
  479. return nullptr;
  480. Type *MapTy = Type::getIntNTy(
  481. C->getContext(), DL.getTypeSizeInBits(LoadTy).getFixedSize());
  482. if (Constant *Res = FoldReinterpretLoadFromConst(C, MapTy, Offset, DL)) {
  483. if (Res->isNullValue() && !LoadTy->isX86_MMXTy() &&
  484. !LoadTy->isX86_AMXTy())
  485. // Materializing a zero can be done trivially without a bitcast
  486. return Constant::getNullValue(LoadTy);
  487. Type *CastTy = LoadTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(LoadTy) : LoadTy;
  488. Res = FoldBitCast(Res, CastTy, DL);
  489. if (LoadTy->isPtrOrPtrVectorTy()) {
  490. // For vector of pointer, we needed to first convert to a vector of integer, then do vector inttoptr
  491. if (Res->isNullValue() && !LoadTy->isX86_MMXTy() &&
  492. !LoadTy->isX86_AMXTy())
  493. return Constant::getNullValue(LoadTy);
  494. if (DL.isNonIntegralPointerType(LoadTy->getScalarType()))
  495. // Be careful not to replace a load of an addrspace value with an inttoptr here
  496. return nullptr;
  497. Res = ConstantExpr::getCast(Instruction::IntToPtr, Res, LoadTy);
  498. }
  499. return Res;
  500. }
  501. return nullptr;
  502. }
  503. unsigned BytesLoaded = (IntType->getBitWidth() + 7) / 8;
  504. if (BytesLoaded > 32 || BytesLoaded == 0)
  505. return nullptr;
  506. // If we're not accessing anything in this constant, the result is undefined.
  507. if (Offset <= -1 * static_cast<int64_t>(BytesLoaded))
  508. return UndefValue::get(IntType);
  509. // TODO: We should be able to support scalable types.
  510. TypeSize InitializerSize = DL.getTypeAllocSize(C->getType());
  511. if (InitializerSize.isScalable())
  512. return nullptr;
  513. // If we're not accessing anything in this constant, the result is undefined.
  514. if (Offset >= (int64_t)InitializerSize.getFixedValue())
  515. return UndefValue::get(IntType);
  516. unsigned char RawBytes[32] = {0};
  517. unsigned char *CurPtr = RawBytes;
  518. unsigned BytesLeft = BytesLoaded;
  519. // If we're loading off the beginning of the global, some bytes may be valid.
  520. if (Offset < 0) {
  521. CurPtr += -Offset;
  522. BytesLeft += Offset;
  523. Offset = 0;
  524. }
  525. if (!ReadDataFromGlobal(C, Offset, CurPtr, BytesLeft, DL))
  526. return nullptr;
  527. APInt ResultVal = APInt(IntType->getBitWidth(), 0);
  528. if (DL.isLittleEndian()) {
  529. ResultVal = RawBytes[BytesLoaded - 1];
  530. for (unsigned i = 1; i != BytesLoaded; ++i) {
  531. ResultVal <<= 8;
  532. ResultVal |= RawBytes[BytesLoaded - 1 - i];
  533. }
  534. } else {
  535. ResultVal = RawBytes[0];
  536. for (unsigned i = 1; i != BytesLoaded; ++i) {
  537. ResultVal <<= 8;
  538. ResultVal |= RawBytes[i];
  539. }
  540. }
  541. return ConstantInt::get(IntType->getContext(), ResultVal);
  542. }
  543. /// If this Offset points exactly to the start of an aggregate element, return
  544. /// that element, otherwise return nullptr.
  545. Constant *getConstantAtOffset(Constant *Base, APInt Offset,
  546. const DataLayout &DL) {
  547. if (Offset.isZero())
  548. return Base;
  549. if (!isa<ConstantAggregate>(Base) && !isa<ConstantDataSequential>(Base))
  550. return nullptr;
  551. Type *ElemTy = Base->getType();
  552. SmallVector<APInt> Indices = DL.getGEPIndicesForOffset(ElemTy, Offset);
  553. if (!Offset.isZero() || !Indices[0].isZero())
  554. return nullptr;
  555. Constant *C = Base;
  556. for (const APInt &Index : drop_begin(Indices)) {
  557. if (Index.isNegative() || Index.getActiveBits() >= 32)
  558. return nullptr;
  559. C = C->getAggregateElement(Index.getZExtValue());
  560. if (!C)
  561. return nullptr;
  562. }
  563. return C;
  564. }
  565. } // end anonymous namespace
  566. Constant *llvm::ConstantFoldLoadFromConst(Constant *C, Type *Ty,
  567. const APInt &Offset,
  568. const DataLayout &DL) {
  569. if (Constant *AtOffset = getConstantAtOffset(C, Offset, DL))
  570. if (Constant *Result = ConstantFoldLoadThroughBitcast(AtOffset, Ty, DL))
  571. return Result;
  572. // Explicitly check for out-of-bounds access, so we return undef even if the
  573. // constant is a uniform value.
  574. TypeSize Size = DL.getTypeAllocSize(C->getType());
  575. if (!Size.isScalable() && Offset.sge(Size.getFixedSize()))
  576. return UndefValue::get(Ty);
  577. // Try an offset-independent fold of a uniform value.
  578. if (Constant *Result = ConstantFoldLoadFromUniformValue(C, Ty))
  579. return Result;
  580. // Try hard to fold loads from bitcasted strange and non-type-safe things.
  581. if (Offset.getMinSignedBits() <= 64)
  582. if (Constant *Result =
  583. FoldReinterpretLoadFromConst(C, Ty, Offset.getSExtValue(), DL))
  584. return Result;
  585. return nullptr;
  586. }
  587. Constant *llvm::ConstantFoldLoadFromConst(Constant *C, Type *Ty,
  588. const DataLayout &DL) {
  589. return ConstantFoldLoadFromConst(C, Ty, APInt(64, 0), DL);
  590. }
  591. Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty,
  592. APInt Offset,
  593. const DataLayout &DL) {
  594. C = cast<Constant>(C->stripAndAccumulateConstantOffsets(
  595. DL, Offset, /* AllowNonInbounds */ true));
  596. if (auto *GV = dyn_cast<GlobalVariable>(C))
  597. if (GV->isConstant() && GV->hasDefinitiveInitializer())
  598. if (Constant *Result = ConstantFoldLoadFromConst(GV->getInitializer(), Ty,
  599. Offset, DL))
  600. return Result;
  601. // If this load comes from anywhere in a uniform constant global, the value
  602. // is always the same, regardless of the loaded offset.
  603. if (auto *GV = dyn_cast<GlobalVariable>(getUnderlyingObject(C))) {
  604. if (GV->isConstant() && GV->hasDefinitiveInitializer()) {
  605. if (Constant *Res =
  606. ConstantFoldLoadFromUniformValue(GV->getInitializer(), Ty))
  607. return Res;
  608. }
  609. }
  610. return nullptr;
  611. }
  612. Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty,
  613. const DataLayout &DL) {
  614. APInt Offset(DL.getIndexTypeSizeInBits(C->getType()), 0);
  615. return ConstantFoldLoadFromConstPtr(C, Ty, Offset, DL);
  616. }
  617. Constant *llvm::ConstantFoldLoadFromUniformValue(Constant *C, Type *Ty) {
  618. if (isa<PoisonValue>(C))
  619. return PoisonValue::get(Ty);
  620. if (isa<UndefValue>(C))
  621. return UndefValue::get(Ty);
  622. if (C->isNullValue() && !Ty->isX86_MMXTy() && !Ty->isX86_AMXTy())
  623. return Constant::getNullValue(Ty);
  624. if (C->isAllOnesValue() &&
  625. (Ty->isIntOrIntVectorTy() || Ty->isFPOrFPVectorTy()))
  626. return Constant::getAllOnesValue(Ty);
  627. return nullptr;
  628. }
  629. namespace {
  630. /// One of Op0/Op1 is a constant expression.
  631. /// Attempt to symbolically evaluate the result of a binary operator merging
  632. /// these together. If target data info is available, it is provided as DL,
  633. /// otherwise DL is null.
  634. Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, Constant *Op1,
  635. const DataLayout &DL) {
  636. // SROA
  637. // Fold (and 0xffffffff00000000, (shl x, 32)) -> shl.
  638. // Fold (lshr (or X, Y), 32) -> (lshr [X/Y], 32) if one doesn't contribute
  639. // bits.
  640. if (Opc == Instruction::And) {
  641. KnownBits Known0 = computeKnownBits(Op0, DL);
  642. KnownBits Known1 = computeKnownBits(Op1, DL);
  643. if ((Known1.One | Known0.Zero).isAllOnes()) {
  644. // All the bits of Op0 that the 'and' could be masking are already zero.
  645. return Op0;
  646. }
  647. if ((Known0.One | Known1.Zero).isAllOnes()) {
  648. // All the bits of Op1 that the 'and' could be masking are already zero.
  649. return Op1;
  650. }
  651. Known0 &= Known1;
  652. if (Known0.isConstant())
  653. return ConstantInt::get(Op0->getType(), Known0.getConstant());
  654. }
  655. // If the constant expr is something like &A[123] - &A[4].f, fold this into a
  656. // constant. This happens frequently when iterating over a global array.
  657. if (Opc == Instruction::Sub) {
  658. GlobalValue *GV1, *GV2;
  659. APInt Offs1, Offs2;
  660. if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, DL))
  661. if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, DL) && GV1 == GV2) {
  662. unsigned OpSize = DL.getTypeSizeInBits(Op0->getType());
  663. // (&GV+C1) - (&GV+C2) -> C1-C2, pointer arithmetic cannot overflow.
  664. // PtrToInt may change the bitwidth so we have convert to the right size
  665. // first.
  666. return ConstantInt::get(Op0->getType(), Offs1.zextOrTrunc(OpSize) -
  667. Offs2.zextOrTrunc(OpSize));
  668. }
  669. }
  670. return nullptr;
  671. }
  672. /// If array indices are not pointer-sized integers, explicitly cast them so
  673. /// that they aren't implicitly casted by the getelementptr.
  674. Constant *CastGEPIndices(Type *SrcElemTy, ArrayRef<Constant *> Ops,
  675. Type *ResultTy, Optional<unsigned> InRangeIndex,
  676. const DataLayout &DL, const TargetLibraryInfo *TLI) {
  677. Type *IntIdxTy = DL.getIndexType(ResultTy);
  678. Type *IntIdxScalarTy = IntIdxTy->getScalarType();
  679. bool Any = false;
  680. SmallVector<Constant*, 32> NewIdxs;
  681. for (unsigned i = 1, e = Ops.size(); i != e; ++i) {
  682. if ((i == 1 ||
  683. !isa<StructType>(GetElementPtrInst::getIndexedType(
  684. SrcElemTy, Ops.slice(1, i - 1)))) &&
  685. Ops[i]->getType()->getScalarType() != IntIdxScalarTy) {
  686. Any = true;
  687. Type *NewType = Ops[i]->getType()->isVectorTy()
  688. ? IntIdxTy
  689. : IntIdxScalarTy;
  690. NewIdxs.push_back(ConstantExpr::getCast(CastInst::getCastOpcode(Ops[i],
  691. true,
  692. NewType,
  693. true),
  694. Ops[i], NewType));
  695. } else
  696. NewIdxs.push_back(Ops[i]);
  697. }
  698. if (!Any)
  699. return nullptr;
  700. Constant *C = ConstantExpr::getGetElementPtr(
  701. SrcElemTy, Ops[0], NewIdxs, /*InBounds=*/false, InRangeIndex);
  702. return ConstantFoldConstant(C, DL, TLI);
  703. }
  704. /// Strip the pointer casts, but preserve the address space information.
  705. Constant *StripPtrCastKeepAS(Constant *Ptr) {
  706. assert(Ptr->getType()->isPointerTy() && "Not a pointer type");
  707. auto *OldPtrTy = cast<PointerType>(Ptr->getType());
  708. Ptr = cast<Constant>(Ptr->stripPointerCasts());
  709. auto *NewPtrTy = cast<PointerType>(Ptr->getType());
  710. // Preserve the address space number of the pointer.
  711. if (NewPtrTy->getAddressSpace() != OldPtrTy->getAddressSpace()) {
  712. Ptr = ConstantExpr::getPointerCast(
  713. Ptr, PointerType::getWithSamePointeeType(NewPtrTy,
  714. OldPtrTy->getAddressSpace()));
  715. }
  716. return Ptr;
  717. }
  718. /// If we can symbolically evaluate the GEP constant expression, do so.
  719. Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
  720. ArrayRef<Constant *> Ops,
  721. const DataLayout &DL,
  722. const TargetLibraryInfo *TLI) {
  723. const GEPOperator *InnermostGEP = GEP;
  724. bool InBounds = GEP->isInBounds();
  725. Type *SrcElemTy = GEP->getSourceElementType();
  726. Type *ResElemTy = GEP->getResultElementType();
  727. Type *ResTy = GEP->getType();
  728. if (!SrcElemTy->isSized() || isa<ScalableVectorType>(SrcElemTy))
  729. return nullptr;
  730. if (Constant *C = CastGEPIndices(SrcElemTy, Ops, ResTy,
  731. GEP->getInRangeIndex(), DL, TLI))
  732. return C;
  733. Constant *Ptr = Ops[0];
  734. if (!Ptr->getType()->isPointerTy())
  735. return nullptr;
  736. Type *IntIdxTy = DL.getIndexType(Ptr->getType());
  737. // If this is "gep i8* Ptr, (sub 0, V)", fold this as:
  738. // "inttoptr (sub (ptrtoint Ptr), V)"
  739. if (Ops.size() == 2 && ResElemTy->isIntegerTy(8)) {
  740. auto *CE = dyn_cast<ConstantExpr>(Ops[1]);
  741. assert((!CE || CE->getType() == IntIdxTy) &&
  742. "CastGEPIndices didn't canonicalize index types!");
  743. if (CE && CE->getOpcode() == Instruction::Sub &&
  744. CE->getOperand(0)->isNullValue()) {
  745. Constant *Res = ConstantExpr::getPtrToInt(Ptr, CE->getType());
  746. Res = ConstantExpr::getSub(Res, CE->getOperand(1));
  747. Res = ConstantExpr::getIntToPtr(Res, ResTy);
  748. return ConstantFoldConstant(Res, DL, TLI);
  749. }
  750. }
  751. for (unsigned i = 1, e = Ops.size(); i != e; ++i)
  752. if (!isa<ConstantInt>(Ops[i]))
  753. return nullptr;
  754. unsigned BitWidth = DL.getTypeSizeInBits(IntIdxTy);
  755. APInt Offset =
  756. APInt(BitWidth,
  757. DL.getIndexedOffsetInType(
  758. SrcElemTy,
  759. makeArrayRef((Value * const *)Ops.data() + 1, Ops.size() - 1)));
  760. Ptr = StripPtrCastKeepAS(Ptr);
  761. // If this is a GEP of a GEP, fold it all into a single GEP.
  762. while (auto *GEP = dyn_cast<GEPOperator>(Ptr)) {
  763. InnermostGEP = GEP;
  764. InBounds &= GEP->isInBounds();
  765. SmallVector<Value *, 4> NestedOps(llvm::drop_begin(GEP->operands()));
  766. // Do not try the incorporate the sub-GEP if some index is not a number.
  767. bool AllConstantInt = true;
  768. for (Value *NestedOp : NestedOps)
  769. if (!isa<ConstantInt>(NestedOp)) {
  770. AllConstantInt = false;
  771. break;
  772. }
  773. if (!AllConstantInt)
  774. break;
  775. Ptr = cast<Constant>(GEP->getOperand(0));
  776. SrcElemTy = GEP->getSourceElementType();
  777. Offset += APInt(BitWidth, DL.getIndexedOffsetInType(SrcElemTy, NestedOps));
  778. Ptr = StripPtrCastKeepAS(Ptr);
  779. }
  780. // If the base value for this address is a literal integer value, fold the
  781. // getelementptr to the resulting integer value casted to the pointer type.
  782. APInt BasePtr(BitWidth, 0);
  783. if (auto *CE = dyn_cast<ConstantExpr>(Ptr)) {
  784. if (CE->getOpcode() == Instruction::IntToPtr) {
  785. if (auto *Base = dyn_cast<ConstantInt>(CE->getOperand(0)))
  786. BasePtr = Base->getValue().zextOrTrunc(BitWidth);
  787. }
  788. }
  789. auto *PTy = cast<PointerType>(Ptr->getType());
  790. if ((Ptr->isNullValue() || BasePtr != 0) &&
  791. !DL.isNonIntegralPointerType(PTy)) {
  792. Constant *C = ConstantInt::get(Ptr->getContext(), Offset + BasePtr);
  793. return ConstantExpr::getIntToPtr(C, ResTy);
  794. }
  795. // Otherwise form a regular getelementptr. Recompute the indices so that
  796. // we eliminate over-indexing of the notional static type array bounds.
  797. // This makes it easy to determine if the getelementptr is "inbounds".
  798. // Also, this helps GlobalOpt do SROA on GlobalVariables.
  799. // For GEPs of GlobalValues, use the value type even for opaque pointers.
  800. // Otherwise use an i8 GEP.
  801. if (auto *GV = dyn_cast<GlobalValue>(Ptr))
  802. SrcElemTy = GV->getValueType();
  803. else if (!PTy->isOpaque())
  804. SrcElemTy = PTy->getNonOpaquePointerElementType();
  805. else
  806. SrcElemTy = Type::getInt8Ty(Ptr->getContext());
  807. if (!SrcElemTy->isSized())
  808. return nullptr;
  809. Type *ElemTy = SrcElemTy;
  810. SmallVector<APInt> Indices = DL.getGEPIndicesForOffset(ElemTy, Offset);
  811. if (Offset != 0)
  812. return nullptr;
  813. // Try to add additional zero indices to reach the desired result element
  814. // type.
  815. // TODO: Should we avoid extra zero indices if ResElemTy can't be reached and
  816. // we'll have to insert a bitcast anyway?
  817. while (ElemTy != ResElemTy) {
  818. Type *NextTy = GetElementPtrInst::getTypeAtIndex(ElemTy, (uint64_t)0);
  819. if (!NextTy)
  820. break;
  821. Indices.push_back(APInt::getZero(isa<StructType>(ElemTy) ? 32 : BitWidth));
  822. ElemTy = NextTy;
  823. }
  824. SmallVector<Constant *, 32> NewIdxs;
  825. for (const APInt &Index : Indices)
  826. NewIdxs.push_back(ConstantInt::get(
  827. Type::getIntNTy(Ptr->getContext(), Index.getBitWidth()), Index));
  828. // Preserve the inrange index from the innermost GEP if possible. We must
  829. // have calculated the same indices up to and including the inrange index.
  830. Optional<unsigned> InRangeIndex;
  831. if (Optional<unsigned> LastIRIndex = InnermostGEP->getInRangeIndex())
  832. if (SrcElemTy == InnermostGEP->getSourceElementType() &&
  833. NewIdxs.size() > *LastIRIndex) {
  834. InRangeIndex = LastIRIndex;
  835. for (unsigned I = 0; I <= *LastIRIndex; ++I)
  836. if (NewIdxs[I] != InnermostGEP->getOperand(I + 1))
  837. return nullptr;
  838. }
  839. // Create a GEP.
  840. Constant *C = ConstantExpr::getGetElementPtr(SrcElemTy, Ptr, NewIdxs,
  841. InBounds, InRangeIndex);
  842. assert(
  843. cast<PointerType>(C->getType())->isOpaqueOrPointeeTypeMatches(ElemTy) &&
  844. "Computed GetElementPtr has unexpected type!");
  845. // If we ended up indexing a member with a type that doesn't match
  846. // the type of what the original indices indexed, add a cast.
  847. if (C->getType() != ResTy)
  848. C = FoldBitCast(C, ResTy, DL);
  849. return C;
  850. }
  851. /// Attempt to constant fold an instruction with the
  852. /// specified opcode and operands. If successful, the constant result is
  853. /// returned, if not, null is returned. Note that this function can fail when
  854. /// attempting to fold instructions like loads and stores, which have no
  855. /// constant expression form.
  856. Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode,
  857. ArrayRef<Constant *> Ops,
  858. const DataLayout &DL,
  859. const TargetLibraryInfo *TLI) {
  860. Type *DestTy = InstOrCE->getType();
  861. if (Instruction::isUnaryOp(Opcode))
  862. return ConstantFoldUnaryOpOperand(Opcode, Ops[0], DL);
  863. if (Instruction::isBinaryOp(Opcode))
  864. return ConstantFoldBinaryOpOperands(Opcode, Ops[0], Ops[1], DL);
  865. if (Instruction::isCast(Opcode))
  866. return ConstantFoldCastOperand(Opcode, Ops[0], DestTy, DL);
  867. if (auto *GEP = dyn_cast<GEPOperator>(InstOrCE)) {
  868. if (Constant *C = SymbolicallyEvaluateGEP(GEP, Ops, DL, TLI))
  869. return C;
  870. return ConstantExpr::getGetElementPtr(GEP->getSourceElementType(), Ops[0],
  871. Ops.slice(1), GEP->isInBounds(),
  872. GEP->getInRangeIndex());
  873. }
  874. if (auto *CE = dyn_cast<ConstantExpr>(InstOrCE))
  875. return CE->getWithOperands(Ops);
  876. switch (Opcode) {
  877. default: return nullptr;
  878. case Instruction::ICmp:
  879. case Instruction::FCmp: llvm_unreachable("Invalid for compares");
  880. case Instruction::Freeze:
  881. return isGuaranteedNotToBeUndefOrPoison(Ops[0]) ? Ops[0] : nullptr;
  882. case Instruction::Call:
  883. if (auto *F = dyn_cast<Function>(Ops.back())) {
  884. const auto *Call = cast<CallBase>(InstOrCE);
  885. if (canConstantFoldCallTo(Call, F))
  886. return ConstantFoldCall(Call, F, Ops.slice(0, Ops.size() - 1), TLI);
  887. }
  888. return nullptr;
  889. case Instruction::Select:
  890. return ConstantExpr::getSelect(Ops[0], Ops[1], Ops[2]);
  891. case Instruction::ExtractElement:
  892. return ConstantExpr::getExtractElement(Ops[0], Ops[1]);
  893. case Instruction::ExtractValue:
  894. return ConstantExpr::getExtractValue(
  895. Ops[0], cast<ExtractValueInst>(InstOrCE)->getIndices());
  896. case Instruction::InsertElement:
  897. return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]);
  898. case Instruction::ShuffleVector:
  899. return ConstantExpr::getShuffleVector(
  900. Ops[0], Ops[1], cast<ShuffleVectorInst>(InstOrCE)->getShuffleMask());
  901. }
  902. }
  903. } // end anonymous namespace
  904. //===----------------------------------------------------------------------===//
  905. // Constant Folding public APIs
  906. //===----------------------------------------------------------------------===//
  907. namespace {
  908. Constant *
  909. ConstantFoldConstantImpl(const Constant *C, const DataLayout &DL,
  910. const TargetLibraryInfo *TLI,
  911. SmallDenseMap<Constant *, Constant *> &FoldedOps) {
  912. if (!isa<ConstantVector>(C) && !isa<ConstantExpr>(C))
  913. return const_cast<Constant *>(C);
  914. SmallVector<Constant *, 8> Ops;
  915. for (const Use &OldU : C->operands()) {
  916. Constant *OldC = cast<Constant>(&OldU);
  917. Constant *NewC = OldC;
  918. // Recursively fold the ConstantExpr's operands. If we have already folded
  919. // a ConstantExpr, we don't have to process it again.
  920. if (isa<ConstantVector>(OldC) || isa<ConstantExpr>(OldC)) {
  921. auto It = FoldedOps.find(OldC);
  922. if (It == FoldedOps.end()) {
  923. NewC = ConstantFoldConstantImpl(OldC, DL, TLI, FoldedOps);
  924. FoldedOps.insert({OldC, NewC});
  925. } else {
  926. NewC = It->second;
  927. }
  928. }
  929. Ops.push_back(NewC);
  930. }
  931. if (auto *CE = dyn_cast<ConstantExpr>(C)) {
  932. if (CE->isCompare())
  933. return ConstantFoldCompareInstOperands(CE->getPredicate(), Ops[0], Ops[1],
  934. DL, TLI);
  935. return ConstantFoldInstOperandsImpl(CE, CE->getOpcode(), Ops, DL, TLI);
  936. }
  937. assert(isa<ConstantVector>(C));
  938. return ConstantVector::get(Ops);
  939. }
  940. } // end anonymous namespace
  941. Constant *llvm::ConstantFoldInstruction(Instruction *I, const DataLayout &DL,
  942. const TargetLibraryInfo *TLI) {
  943. // Handle PHI nodes quickly here...
  944. if (auto *PN = dyn_cast<PHINode>(I)) {
  945. Constant *CommonValue = nullptr;
  946. SmallDenseMap<Constant *, Constant *> FoldedOps;
  947. for (Value *Incoming : PN->incoming_values()) {
  948. // If the incoming value is undef then skip it. Note that while we could
  949. // skip the value if it is equal to the phi node itself we choose not to
  950. // because that would break the rule that constant folding only applies if
  951. // all operands are constants.
  952. if (isa<UndefValue>(Incoming))
  953. continue;
  954. // If the incoming value is not a constant, then give up.
  955. auto *C = dyn_cast<Constant>(Incoming);
  956. if (!C)
  957. return nullptr;
  958. // Fold the PHI's operands.
  959. C = ConstantFoldConstantImpl(C, DL, TLI, FoldedOps);
  960. // If the incoming value is a different constant to
  961. // the one we saw previously, then give up.
  962. if (CommonValue && C != CommonValue)
  963. return nullptr;
  964. CommonValue = C;
  965. }
  966. // If we reach here, all incoming values are the same constant or undef.
  967. return CommonValue ? CommonValue : UndefValue::get(PN->getType());
  968. }
  969. // Scan the operand list, checking to see if they are all constants, if so,
  970. // hand off to ConstantFoldInstOperandsImpl.
  971. if (!all_of(I->operands(), [](Use &U) { return isa<Constant>(U); }))
  972. return nullptr;
  973. SmallDenseMap<Constant *, Constant *> FoldedOps;
  974. SmallVector<Constant *, 8> Ops;
  975. for (const Use &OpU : I->operands()) {
  976. auto *Op = cast<Constant>(&OpU);
  977. // Fold the Instruction's operands.
  978. Op = ConstantFoldConstantImpl(Op, DL, TLI, FoldedOps);
  979. Ops.push_back(Op);
  980. }
  981. if (const auto *CI = dyn_cast<CmpInst>(I))
  982. return ConstantFoldCompareInstOperands(CI->getPredicate(), Ops[0], Ops[1],
  983. DL, TLI);
  984. if (const auto *LI = dyn_cast<LoadInst>(I)) {
  985. if (LI->isVolatile())
  986. return nullptr;
  987. return ConstantFoldLoadFromConstPtr(Ops[0], LI->getType(), DL);
  988. }
  989. if (auto *IVI = dyn_cast<InsertValueInst>(I))
  990. return ConstantExpr::getInsertValue(Ops[0], Ops[1], IVI->getIndices());
  991. if (auto *EVI = dyn_cast<ExtractValueInst>(I))
  992. return ConstantExpr::getExtractValue(Ops[0], EVI->getIndices());
  993. return ConstantFoldInstOperands(I, Ops, DL, TLI);
  994. }
  995. Constant *llvm::ConstantFoldConstant(const Constant *C, const DataLayout &DL,
  996. const TargetLibraryInfo *TLI) {
  997. SmallDenseMap<Constant *, Constant *> FoldedOps;
  998. return ConstantFoldConstantImpl(C, DL, TLI, FoldedOps);
  999. }
  1000. Constant *llvm::ConstantFoldInstOperands(Instruction *I,
  1001. ArrayRef<Constant *> Ops,
  1002. const DataLayout &DL,
  1003. const TargetLibraryInfo *TLI) {
  1004. return ConstantFoldInstOperandsImpl(I, I->getOpcode(), Ops, DL, TLI);
  1005. }
  1006. Constant *llvm::ConstantFoldCompareInstOperands(unsigned IntPredicate,
  1007. Constant *Ops0, Constant *Ops1,
  1008. const DataLayout &DL,
  1009. const TargetLibraryInfo *TLI) {
  1010. CmpInst::Predicate Predicate = (CmpInst::Predicate)IntPredicate;
  1011. // fold: icmp (inttoptr x), null -> icmp x, 0
  1012. // fold: icmp null, (inttoptr x) -> icmp 0, x
  1013. // fold: icmp (ptrtoint x), 0 -> icmp x, null
  1014. // fold: icmp 0, (ptrtoint x) -> icmp null, x
  1015. // fold: icmp (inttoptr x), (inttoptr y) -> icmp trunc/zext x, trunc/zext y
  1016. // fold: icmp (ptrtoint x), (ptrtoint y) -> icmp x, y
  1017. //
  1018. // FIXME: The following comment is out of data and the DataLayout is here now.
  1019. // ConstantExpr::getCompare cannot do this, because it doesn't have DL
  1020. // around to know if bit truncation is happening.
  1021. if (auto *CE0 = dyn_cast<ConstantExpr>(Ops0)) {
  1022. if (Ops1->isNullValue()) {
  1023. if (CE0->getOpcode() == Instruction::IntToPtr) {
  1024. Type *IntPtrTy = DL.getIntPtrType(CE0->getType());
  1025. // Convert the integer value to the right size to ensure we get the
  1026. // proper extension or truncation.
  1027. Constant *C = ConstantExpr::getIntegerCast(CE0->getOperand(0),
  1028. IntPtrTy, false);
  1029. Constant *Null = Constant::getNullValue(C->getType());
  1030. return ConstantFoldCompareInstOperands(Predicate, C, Null, DL, TLI);
  1031. }
  1032. // Only do this transformation if the int is intptrty in size, otherwise
  1033. // there is a truncation or extension that we aren't modeling.
  1034. if (CE0->getOpcode() == Instruction::PtrToInt) {
  1035. Type *IntPtrTy = DL.getIntPtrType(CE0->getOperand(0)->getType());
  1036. if (CE0->getType() == IntPtrTy) {
  1037. Constant *C = CE0->getOperand(0);
  1038. Constant *Null = Constant::getNullValue(C->getType());
  1039. return ConstantFoldCompareInstOperands(Predicate, C, Null, DL, TLI);
  1040. }
  1041. }
  1042. }
  1043. if (auto *CE1 = dyn_cast<ConstantExpr>(Ops1)) {
  1044. if (CE0->getOpcode() == CE1->getOpcode()) {
  1045. if (CE0->getOpcode() == Instruction::IntToPtr) {
  1046. Type *IntPtrTy = DL.getIntPtrType(CE0->getType());
  1047. // Convert the integer value to the right size to ensure we get the
  1048. // proper extension or truncation.
  1049. Constant *C0 = ConstantExpr::getIntegerCast(CE0->getOperand(0),
  1050. IntPtrTy, false);
  1051. Constant *C1 = ConstantExpr::getIntegerCast(CE1->getOperand(0),
  1052. IntPtrTy, false);
  1053. return ConstantFoldCompareInstOperands(Predicate, C0, C1, DL, TLI);
  1054. }
  1055. // Only do this transformation if the int is intptrty in size, otherwise
  1056. // there is a truncation or extension that we aren't modeling.
  1057. if (CE0->getOpcode() == Instruction::PtrToInt) {
  1058. Type *IntPtrTy = DL.getIntPtrType(CE0->getOperand(0)->getType());
  1059. if (CE0->getType() == IntPtrTy &&
  1060. CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType()) {
  1061. return ConstantFoldCompareInstOperands(
  1062. Predicate, CE0->getOperand(0), CE1->getOperand(0), DL, TLI);
  1063. }
  1064. }
  1065. }
  1066. }
  1067. // icmp eq (or x, y), 0 -> (icmp eq x, 0) & (icmp eq y, 0)
  1068. // icmp ne (or x, y), 0 -> (icmp ne x, 0) | (icmp ne y, 0)
  1069. if ((Predicate == ICmpInst::ICMP_EQ || Predicate == ICmpInst::ICMP_NE) &&
  1070. CE0->getOpcode() == Instruction::Or && Ops1->isNullValue()) {
  1071. Constant *LHS = ConstantFoldCompareInstOperands(
  1072. Predicate, CE0->getOperand(0), Ops1, DL, TLI);
  1073. Constant *RHS = ConstantFoldCompareInstOperands(
  1074. Predicate, CE0->getOperand(1), Ops1, DL, TLI);
  1075. unsigned OpC =
  1076. Predicate == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or;
  1077. return ConstantFoldBinaryOpOperands(OpC, LHS, RHS, DL);
  1078. }
  1079. // Convert pointer comparison (base+offset1) pred (base+offset2) into
  1080. // offset1 pred offset2, for the case where the offset is inbounds. This
  1081. // only works for equality and unsigned comparison, as inbounds permits
  1082. // crossing the sign boundary. However, the offset comparison itself is
  1083. // signed.
  1084. if (Ops0->getType()->isPointerTy() && !ICmpInst::isSigned(Predicate)) {
  1085. unsigned IndexWidth = DL.getIndexTypeSizeInBits(Ops0->getType());
  1086. APInt Offset0(IndexWidth, 0);
  1087. Value *Stripped0 =
  1088. Ops0->stripAndAccumulateInBoundsConstantOffsets(DL, Offset0);
  1089. APInt Offset1(IndexWidth, 0);
  1090. Value *Stripped1 =
  1091. Ops1->stripAndAccumulateInBoundsConstantOffsets(DL, Offset1);
  1092. if (Stripped0 == Stripped1)
  1093. return ConstantExpr::getCompare(
  1094. ICmpInst::getSignedPredicate(Predicate),
  1095. ConstantInt::get(CE0->getContext(), Offset0),
  1096. ConstantInt::get(CE0->getContext(), Offset1));
  1097. }
  1098. } else if (isa<ConstantExpr>(Ops1)) {
  1099. // If RHS is a constant expression, but the left side isn't, swap the
  1100. // operands and try again.
  1101. Predicate = ICmpInst::getSwappedPredicate(Predicate);
  1102. return ConstantFoldCompareInstOperands(Predicate, Ops1, Ops0, DL, TLI);
  1103. }
  1104. return ConstantExpr::getCompare(Predicate, Ops0, Ops1);
  1105. }
  1106. Constant *llvm::ConstantFoldUnaryOpOperand(unsigned Opcode, Constant *Op,
  1107. const DataLayout &DL) {
  1108. assert(Instruction::isUnaryOp(Opcode));
  1109. return ConstantExpr::get(Opcode, Op);
  1110. }
  1111. Constant *llvm::ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS,
  1112. Constant *RHS,
  1113. const DataLayout &DL) {
  1114. assert(Instruction::isBinaryOp(Opcode));
  1115. if (isa<ConstantExpr>(LHS) || isa<ConstantExpr>(RHS))
  1116. if (Constant *C = SymbolicallyEvaluateBinop(Opcode, LHS, RHS, DL))
  1117. return C;
  1118. return ConstantExpr::get(Opcode, LHS, RHS);
  1119. }
  1120. Constant *llvm::ConstantFoldCastOperand(unsigned Opcode, Constant *C,
  1121. Type *DestTy, const DataLayout &DL) {
  1122. assert(Instruction::isCast(Opcode));
  1123. switch (Opcode) {
  1124. default:
  1125. llvm_unreachable("Missing case");
  1126. case Instruction::PtrToInt:
  1127. if (auto *CE = dyn_cast<ConstantExpr>(C)) {
  1128. Constant *FoldedValue = nullptr;
  1129. // If the input is a inttoptr, eliminate the pair. This requires knowing
  1130. // the width of a pointer, so it can't be done in ConstantExpr::getCast.
  1131. if (CE->getOpcode() == Instruction::IntToPtr) {
  1132. // zext/trunc the inttoptr to pointer size.
  1133. FoldedValue = ConstantExpr::getIntegerCast(
  1134. CE->getOperand(0), DL.getIntPtrType(CE->getType()),
  1135. /*IsSigned=*/false);
  1136. } else if (auto *GEP = dyn_cast<GEPOperator>(CE)) {
  1137. // If we have GEP, we can perform the following folds:
  1138. // (ptrtoint (gep null, x)) -> x
  1139. // (ptrtoint (gep (gep null, x), y) -> x + y, etc.
  1140. unsigned BitWidth = DL.getIndexTypeSizeInBits(GEP->getType());
  1141. APInt BaseOffset(BitWidth, 0);
  1142. auto *Base = cast<Constant>(GEP->stripAndAccumulateConstantOffsets(
  1143. DL, BaseOffset, /*AllowNonInbounds=*/true));
  1144. if (Base->isNullValue()) {
  1145. FoldedValue = ConstantInt::get(CE->getContext(), BaseOffset);
  1146. }
  1147. }
  1148. if (FoldedValue) {
  1149. // Do a zext or trunc to get to the ptrtoint dest size.
  1150. return ConstantExpr::getIntegerCast(FoldedValue, DestTy,
  1151. /*IsSigned=*/false);
  1152. }
  1153. }
  1154. return ConstantExpr::getCast(Opcode, C, DestTy);
  1155. case Instruction::IntToPtr:
  1156. // If the input is a ptrtoint, turn the pair into a ptr to ptr bitcast if
  1157. // the int size is >= the ptr size and the address spaces are the same.
  1158. // This requires knowing the width of a pointer, so it can't be done in
  1159. // ConstantExpr::getCast.
  1160. if (auto *CE = dyn_cast<ConstantExpr>(C)) {
  1161. if (CE->getOpcode() == Instruction::PtrToInt) {
  1162. Constant *SrcPtr = CE->getOperand(0);
  1163. unsigned SrcPtrSize = DL.getPointerTypeSizeInBits(SrcPtr->getType());
  1164. unsigned MidIntSize = CE->getType()->getScalarSizeInBits();
  1165. if (MidIntSize >= SrcPtrSize) {
  1166. unsigned SrcAS = SrcPtr->getType()->getPointerAddressSpace();
  1167. if (SrcAS == DestTy->getPointerAddressSpace())
  1168. return FoldBitCast(CE->getOperand(0), DestTy, DL);
  1169. }
  1170. }
  1171. }
  1172. return ConstantExpr::getCast(Opcode, C, DestTy);
  1173. case Instruction::Trunc:
  1174. case Instruction::ZExt:
  1175. case Instruction::SExt:
  1176. case Instruction::FPTrunc:
  1177. case Instruction::FPExt:
  1178. case Instruction::UIToFP:
  1179. case Instruction::SIToFP:
  1180. case Instruction::FPToUI:
  1181. case Instruction::FPToSI:
  1182. case Instruction::AddrSpaceCast:
  1183. return ConstantExpr::getCast(Opcode, C, DestTy);
  1184. case Instruction::BitCast:
  1185. return FoldBitCast(C, DestTy, DL);
  1186. }
  1187. }
  1188. //===----------------------------------------------------------------------===//
  1189. // Constant Folding for Calls
  1190. //
  1191. bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
  1192. if (Call->isNoBuiltin())
  1193. return false;
  1194. switch (F->getIntrinsicID()) {
  1195. // Operations that do not operate floating-point numbers and do not depend on
  1196. // FP environment can be folded even in strictfp functions.
  1197. case Intrinsic::bswap:
  1198. case Intrinsic::ctpop:
  1199. case Intrinsic::ctlz:
  1200. case Intrinsic::cttz:
  1201. case Intrinsic::fshl:
  1202. case Intrinsic::fshr:
  1203. case Intrinsic::launder_invariant_group:
  1204. case Intrinsic::strip_invariant_group:
  1205. case Intrinsic::masked_load:
  1206. case Intrinsic::get_active_lane_mask:
  1207. case Intrinsic::abs:
  1208. case Intrinsic::smax:
  1209. case Intrinsic::smin:
  1210. case Intrinsic::umax:
  1211. case Intrinsic::umin:
  1212. case Intrinsic::sadd_with_overflow:
  1213. case Intrinsic::uadd_with_overflow:
  1214. case Intrinsic::ssub_with_overflow:
  1215. case Intrinsic::usub_with_overflow:
  1216. case Intrinsic::smul_with_overflow:
  1217. case Intrinsic::umul_with_overflow:
  1218. case Intrinsic::sadd_sat:
  1219. case Intrinsic::uadd_sat:
  1220. case Intrinsic::ssub_sat:
  1221. case Intrinsic::usub_sat:
  1222. case Intrinsic::smul_fix:
  1223. case Intrinsic::smul_fix_sat:
  1224. case Intrinsic::bitreverse:
  1225. case Intrinsic::is_constant:
  1226. case Intrinsic::vector_reduce_add:
  1227. case Intrinsic::vector_reduce_mul:
  1228. case Intrinsic::vector_reduce_and:
  1229. case Intrinsic::vector_reduce_or:
  1230. case Intrinsic::vector_reduce_xor:
  1231. case Intrinsic::vector_reduce_smin:
  1232. case Intrinsic::vector_reduce_smax:
  1233. case Intrinsic::vector_reduce_umin:
  1234. case Intrinsic::vector_reduce_umax:
  1235. // Target intrinsics
  1236. case Intrinsic::amdgcn_perm:
  1237. case Intrinsic::arm_mve_vctp8:
  1238. case Intrinsic::arm_mve_vctp16:
  1239. case Intrinsic::arm_mve_vctp32:
  1240. case Intrinsic::arm_mve_vctp64:
  1241. case Intrinsic::aarch64_sve_convert_from_svbool:
  1242. // WebAssembly float semantics are always known
  1243. case Intrinsic::wasm_trunc_signed:
  1244. case Intrinsic::wasm_trunc_unsigned:
  1245. return true;
  1246. // Floating point operations cannot be folded in strictfp functions in
  1247. // general case. They can be folded if FP environment is known to compiler.
  1248. case Intrinsic::minnum:
  1249. case Intrinsic::maxnum:
  1250. case Intrinsic::minimum:
  1251. case Intrinsic::maximum:
  1252. case Intrinsic::log:
  1253. case Intrinsic::log2:
  1254. case Intrinsic::log10:
  1255. case Intrinsic::exp:
  1256. case Intrinsic::exp2:
  1257. case Intrinsic::sqrt:
  1258. case Intrinsic::sin:
  1259. case Intrinsic::cos:
  1260. case Intrinsic::pow:
  1261. case Intrinsic::powi:
  1262. case Intrinsic::fma:
  1263. case Intrinsic::fmuladd:
  1264. case Intrinsic::fptoui_sat:
  1265. case Intrinsic::fptosi_sat:
  1266. case Intrinsic::convert_from_fp16:
  1267. case Intrinsic::convert_to_fp16:
  1268. case Intrinsic::amdgcn_cos:
  1269. case Intrinsic::amdgcn_cubeid:
  1270. case Intrinsic::amdgcn_cubema:
  1271. case Intrinsic::amdgcn_cubesc:
  1272. case Intrinsic::amdgcn_cubetc:
  1273. case Intrinsic::amdgcn_fmul_legacy:
  1274. case Intrinsic::amdgcn_fma_legacy:
  1275. case Intrinsic::amdgcn_fract:
  1276. case Intrinsic::amdgcn_ldexp:
  1277. case Intrinsic::amdgcn_sin:
  1278. // The intrinsics below depend on rounding mode in MXCSR.
  1279. case Intrinsic::x86_sse_cvtss2si:
  1280. case Intrinsic::x86_sse_cvtss2si64:
  1281. case Intrinsic::x86_sse_cvttss2si:
  1282. case Intrinsic::x86_sse_cvttss2si64:
  1283. case Intrinsic::x86_sse2_cvtsd2si:
  1284. case Intrinsic::x86_sse2_cvtsd2si64:
  1285. case Intrinsic::x86_sse2_cvttsd2si:
  1286. case Intrinsic::x86_sse2_cvttsd2si64:
  1287. case Intrinsic::x86_avx512_vcvtss2si32:
  1288. case Intrinsic::x86_avx512_vcvtss2si64:
  1289. case Intrinsic::x86_avx512_cvttss2si:
  1290. case Intrinsic::x86_avx512_cvttss2si64:
  1291. case Intrinsic::x86_avx512_vcvtsd2si32:
  1292. case Intrinsic::x86_avx512_vcvtsd2si64:
  1293. case Intrinsic::x86_avx512_cvttsd2si:
  1294. case Intrinsic::x86_avx512_cvttsd2si64:
  1295. case Intrinsic::x86_avx512_vcvtss2usi32:
  1296. case Intrinsic::x86_avx512_vcvtss2usi64:
  1297. case Intrinsic::x86_avx512_cvttss2usi:
  1298. case Intrinsic::x86_avx512_cvttss2usi64:
  1299. case Intrinsic::x86_avx512_vcvtsd2usi32:
  1300. case Intrinsic::x86_avx512_vcvtsd2usi64:
  1301. case Intrinsic::x86_avx512_cvttsd2usi:
  1302. case Intrinsic::x86_avx512_cvttsd2usi64:
  1303. return !Call->isStrictFP();
  1304. // Sign operations are actually bitwise operations, they do not raise
  1305. // exceptions even for SNANs.
  1306. case Intrinsic::fabs:
  1307. case Intrinsic::copysign:
  1308. // Non-constrained variants of rounding operations means default FP
  1309. // environment, they can be folded in any case.
  1310. case Intrinsic::ceil:
  1311. case Intrinsic::floor:
  1312. case Intrinsic::round:
  1313. case Intrinsic::roundeven:
  1314. case Intrinsic::trunc:
  1315. case Intrinsic::nearbyint:
  1316. case Intrinsic::rint:
  1317. // Constrained intrinsics can be folded if FP environment is known
  1318. // to compiler.
  1319. case Intrinsic::experimental_constrained_fma:
  1320. case Intrinsic::experimental_constrained_fmuladd:
  1321. case Intrinsic::experimental_constrained_fadd:
  1322. case Intrinsic::experimental_constrained_fsub:
  1323. case Intrinsic::experimental_constrained_fmul:
  1324. case Intrinsic::experimental_constrained_fdiv:
  1325. case Intrinsic::experimental_constrained_frem:
  1326. case Intrinsic::experimental_constrained_ceil:
  1327. case Intrinsic::experimental_constrained_floor:
  1328. case Intrinsic::experimental_constrained_round:
  1329. case Intrinsic::experimental_constrained_roundeven:
  1330. case Intrinsic::experimental_constrained_trunc:
  1331. case Intrinsic::experimental_constrained_nearbyint:
  1332. case Intrinsic::experimental_constrained_rint:
  1333. return true;
  1334. default:
  1335. return false;
  1336. case Intrinsic::not_intrinsic: break;
  1337. }
  1338. if (!F->hasName() || Call->isStrictFP())
  1339. return false;
  1340. // In these cases, the check of the length is required. We don't want to
  1341. // return true for a name like "cos\0blah" which strcmp would return equal to
  1342. // "cos", but has length 8.
  1343. StringRef Name = F->getName();
  1344. switch (Name[0]) {
  1345. default:
  1346. return false;
  1347. case 'a':
  1348. return Name == "acos" || Name == "acosf" ||
  1349. Name == "asin" || Name == "asinf" ||
  1350. Name == "atan" || Name == "atanf" ||
  1351. Name == "atan2" || Name == "atan2f";
  1352. case 'c':
  1353. return Name == "ceil" || Name == "ceilf" ||
  1354. Name == "cos" || Name == "cosf" ||
  1355. Name == "cosh" || Name == "coshf";
  1356. case 'e':
  1357. return Name == "exp" || Name == "expf" ||
  1358. Name == "exp2" || Name == "exp2f";
  1359. case 'f':
  1360. return Name == "fabs" || Name == "fabsf" ||
  1361. Name == "floor" || Name == "floorf" ||
  1362. Name == "fmod" || Name == "fmodf";
  1363. case 'l':
  1364. return Name == "log" || Name == "logf" ||
  1365. Name == "log2" || Name == "log2f" ||
  1366. Name == "log10" || Name == "log10f";
  1367. case 'n':
  1368. return Name == "nearbyint" || Name == "nearbyintf";
  1369. case 'p':
  1370. return Name == "pow" || Name == "powf";
  1371. case 'r':
  1372. return Name == "remainder" || Name == "remainderf" ||
  1373. Name == "rint" || Name == "rintf" ||
  1374. Name == "round" || Name == "roundf";
  1375. case 's':
  1376. return Name == "sin" || Name == "sinf" ||
  1377. Name == "sinh" || Name == "sinhf" ||
  1378. Name == "sqrt" || Name == "sqrtf";
  1379. case 't':
  1380. return Name == "tan" || Name == "tanf" ||
  1381. Name == "tanh" || Name == "tanhf" ||
  1382. Name == "trunc" || Name == "truncf";
  1383. case '_':
  1384. // Check for various function names that get used for the math functions
  1385. // when the header files are preprocessed with the macro
  1386. // __FINITE_MATH_ONLY__ enabled.
  1387. // The '12' here is the length of the shortest name that can match.
  1388. // We need to check the size before looking at Name[1] and Name[2]
  1389. // so we may as well check a limit that will eliminate mismatches.
  1390. if (Name.size() < 12 || Name[1] != '_')
  1391. return false;
  1392. switch (Name[2]) {
  1393. default:
  1394. return false;
  1395. case 'a':
  1396. return Name == "__acos_finite" || Name == "__acosf_finite" ||
  1397. Name == "__asin_finite" || Name == "__asinf_finite" ||
  1398. Name == "__atan2_finite" || Name == "__atan2f_finite";
  1399. case 'c':
  1400. return Name == "__cosh_finite" || Name == "__coshf_finite";
  1401. case 'e':
  1402. return Name == "__exp_finite" || Name == "__expf_finite" ||
  1403. Name == "__exp2_finite" || Name == "__exp2f_finite";
  1404. case 'l':
  1405. return Name == "__log_finite" || Name == "__logf_finite" ||
  1406. Name == "__log10_finite" || Name == "__log10f_finite";
  1407. case 'p':
  1408. return Name == "__pow_finite" || Name == "__powf_finite";
  1409. case 's':
  1410. return Name == "__sinh_finite" || Name == "__sinhf_finite";
  1411. }
  1412. }
  1413. }
  1414. namespace {
  1415. Constant *GetConstantFoldFPValue(double V, Type *Ty) {
  1416. if (Ty->isHalfTy() || Ty->isFloatTy()) {
  1417. APFloat APF(V);
  1418. bool unused;
  1419. APF.convert(Ty->getFltSemantics(), APFloat::rmNearestTiesToEven, &unused);
  1420. return ConstantFP::get(Ty->getContext(), APF);
  1421. }
  1422. if (Ty->isDoubleTy())
  1423. return ConstantFP::get(Ty->getContext(), APFloat(V));
  1424. llvm_unreachable("Can only constant fold half/float/double");
  1425. }
  1426. /// Clear the floating-point exception state.
  1427. inline void llvm_fenv_clearexcept() {
  1428. #if defined(HAVE_FENV_H) && HAVE_DECL_FE_ALL_EXCEPT
  1429. feclearexcept(FE_ALL_EXCEPT);
  1430. #endif
  1431. errno = 0;
  1432. }
  1433. /// Test if a floating-point exception was raised.
  1434. inline bool llvm_fenv_testexcept() {
  1435. int errno_val = errno;
  1436. if (errno_val == ERANGE || errno_val == EDOM)
  1437. return true;
  1438. #if defined(HAVE_FENV_H) && HAVE_DECL_FE_ALL_EXCEPT && HAVE_DECL_FE_INEXACT
  1439. if (fetestexcept(FE_ALL_EXCEPT & ~FE_INEXACT))
  1440. return true;
  1441. #endif
  1442. return false;
  1443. }
  1444. Constant *ConstantFoldFP(double (*NativeFP)(double), const APFloat &V,
  1445. Type *Ty) {
  1446. llvm_fenv_clearexcept();
  1447. double Result = NativeFP(V.convertToDouble());
  1448. if (llvm_fenv_testexcept()) {
  1449. llvm_fenv_clearexcept();
  1450. return nullptr;
  1451. }
  1452. return GetConstantFoldFPValue(Result, Ty);
  1453. }
  1454. Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double),
  1455. const APFloat &V, const APFloat &W, Type *Ty) {
  1456. llvm_fenv_clearexcept();
  1457. double Result = NativeFP(V.convertToDouble(), W.convertToDouble());
  1458. if (llvm_fenv_testexcept()) {
  1459. llvm_fenv_clearexcept();
  1460. return nullptr;
  1461. }
  1462. return GetConstantFoldFPValue(Result, Ty);
  1463. }
  1464. Constant *constantFoldVectorReduce(Intrinsic::ID IID, Constant *Op) {
  1465. FixedVectorType *VT = dyn_cast<FixedVectorType>(Op->getType());
  1466. if (!VT)
  1467. return nullptr;
  1468. // This isn't strictly necessary, but handle the special/common case of zero:
  1469. // all integer reductions of a zero input produce zero.
  1470. if (isa<ConstantAggregateZero>(Op))
  1471. return ConstantInt::get(VT->getElementType(), 0);
  1472. // This is the same as the underlying binops - poison propagates.
  1473. if (isa<PoisonValue>(Op) || Op->containsPoisonElement())
  1474. return PoisonValue::get(VT->getElementType());
  1475. // TODO: Handle undef.
  1476. if (!isa<ConstantVector>(Op) && !isa<ConstantDataVector>(Op))
  1477. return nullptr;
  1478. auto *EltC = dyn_cast<ConstantInt>(Op->getAggregateElement(0U));
  1479. if (!EltC)
  1480. return nullptr;
  1481. APInt Acc = EltC->getValue();
  1482. for (unsigned I = 1, E = VT->getNumElements(); I != E; I++) {
  1483. if (!(EltC = dyn_cast<ConstantInt>(Op->getAggregateElement(I))))
  1484. return nullptr;
  1485. const APInt &X = EltC->getValue();
  1486. switch (IID) {
  1487. case Intrinsic::vector_reduce_add:
  1488. Acc = Acc + X;
  1489. break;
  1490. case Intrinsic::vector_reduce_mul:
  1491. Acc = Acc * X;
  1492. break;
  1493. case Intrinsic::vector_reduce_and:
  1494. Acc = Acc & X;
  1495. break;
  1496. case Intrinsic::vector_reduce_or:
  1497. Acc = Acc | X;
  1498. break;
  1499. case Intrinsic::vector_reduce_xor:
  1500. Acc = Acc ^ X;
  1501. break;
  1502. case Intrinsic::vector_reduce_smin:
  1503. Acc = APIntOps::smin(Acc, X);
  1504. break;
  1505. case Intrinsic::vector_reduce_smax:
  1506. Acc = APIntOps::smax(Acc, X);
  1507. break;
  1508. case Intrinsic::vector_reduce_umin:
  1509. Acc = APIntOps::umin(Acc, X);
  1510. break;
  1511. case Intrinsic::vector_reduce_umax:
  1512. Acc = APIntOps::umax(Acc, X);
  1513. break;
  1514. }
  1515. }
  1516. return ConstantInt::get(Op->getContext(), Acc);
  1517. }
  1518. /// Attempt to fold an SSE floating point to integer conversion of a constant
  1519. /// floating point. If roundTowardZero is false, the default IEEE rounding is
  1520. /// used (toward nearest, ties to even). This matches the behavior of the
  1521. /// non-truncating SSE instructions in the default rounding mode. The desired
  1522. /// integer type Ty is used to select how many bits are available for the
  1523. /// result. Returns null if the conversion cannot be performed, otherwise
  1524. /// returns the Constant value resulting from the conversion.
  1525. Constant *ConstantFoldSSEConvertToInt(const APFloat &Val, bool roundTowardZero,
  1526. Type *Ty, bool IsSigned) {
  1527. // All of these conversion intrinsics form an integer of at most 64bits.
  1528. unsigned ResultWidth = Ty->getIntegerBitWidth();
  1529. assert(ResultWidth <= 64 &&
  1530. "Can only constant fold conversions to 64 and 32 bit ints");
  1531. uint64_t UIntVal;
  1532. bool isExact = false;
  1533. APFloat::roundingMode mode = roundTowardZero? APFloat::rmTowardZero
  1534. : APFloat::rmNearestTiesToEven;
  1535. APFloat::opStatus status =
  1536. Val.convertToInteger(makeMutableArrayRef(UIntVal), ResultWidth,
  1537. IsSigned, mode, &isExact);
  1538. if (status != APFloat::opOK &&
  1539. (!roundTowardZero || status != APFloat::opInexact))
  1540. return nullptr;
  1541. return ConstantInt::get(Ty, UIntVal, IsSigned);
  1542. }
  1543. double getValueAsDouble(ConstantFP *Op) {
  1544. Type *Ty = Op->getType();
  1545. if (Ty->isBFloatTy() || Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())
  1546. return Op->getValueAPF().convertToDouble();
  1547. bool unused;
  1548. APFloat APF = Op->getValueAPF();
  1549. APF.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &unused);
  1550. return APF.convertToDouble();
  1551. }
  1552. static bool getConstIntOrUndef(Value *Op, const APInt *&C) {
  1553. if (auto *CI = dyn_cast<ConstantInt>(Op)) {
  1554. C = &CI->getValue();
  1555. return true;
  1556. }
  1557. if (isa<UndefValue>(Op)) {
  1558. C = nullptr;
  1559. return true;
  1560. }
  1561. return false;
  1562. }
  1563. /// Checks if the given intrinsic call, which evaluates to constant, is allowed
  1564. /// to be folded.
  1565. ///
  1566. /// \param CI Constrained intrinsic call.
  1567. /// \param St Exception flags raised during constant evaluation.
  1568. static bool mayFoldConstrained(ConstrainedFPIntrinsic *CI,
  1569. APFloat::opStatus St) {
  1570. Optional<RoundingMode> ORM = CI->getRoundingMode();
  1571. Optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior();
  1572. // If the operation does not change exception status flags, it is safe
  1573. // to fold.
  1574. if (St == APFloat::opStatus::opOK)
  1575. return true;
  1576. // If evaluation raised FP exception, the result can depend on rounding
  1577. // mode. If the latter is unknown, folding is not possible.
  1578. if (!ORM || *ORM == RoundingMode::Dynamic)
  1579. return false;
  1580. // If FP exceptions are ignored, fold the call, even if such exception is
  1581. // raised.
  1582. if (!EB || *EB != fp::ExceptionBehavior::ebStrict)
  1583. return true;
  1584. // Leave the calculation for runtime so that exception flags be correctly set
  1585. // in hardware.
  1586. return false;
  1587. }
  1588. /// Returns the rounding mode that should be used for constant evaluation.
  1589. static RoundingMode
  1590. getEvaluationRoundingMode(const ConstrainedFPIntrinsic *CI) {
  1591. Optional<RoundingMode> ORM = CI->getRoundingMode();
  1592. if (!ORM || *ORM == RoundingMode::Dynamic)
  1593. // Even if the rounding mode is unknown, try evaluating the operation.
  1594. // If it does not raise inexact exception, rounding was not applied,
  1595. // so the result is exact and does not depend on rounding mode. Whether
  1596. // other FP exceptions are raised, it does not depend on rounding mode.
  1597. return RoundingMode::NearestTiesToEven;
  1598. return *ORM;
  1599. }
  1600. static Constant *ConstantFoldScalarCall1(StringRef Name,
  1601. Intrinsic::ID IntrinsicID,
  1602. Type *Ty,
  1603. ArrayRef<Constant *> Operands,
  1604. const TargetLibraryInfo *TLI,
  1605. const CallBase *Call) {
  1606. assert(Operands.size() == 1 && "Wrong number of operands.");
  1607. if (IntrinsicID == Intrinsic::is_constant) {
  1608. // We know we have a "Constant" argument. But we want to only
  1609. // return true for manifest constants, not those that depend on
  1610. // constants with unknowable values, e.g. GlobalValue or BlockAddress.
  1611. if (Operands[0]->isManifestConstant())
  1612. return ConstantInt::getTrue(Ty->getContext());
  1613. return nullptr;
  1614. }
  1615. if (isa<UndefValue>(Operands[0])) {
  1616. // cosine(arg) is between -1 and 1. cosine(invalid arg) is NaN.
  1617. // ctpop() is between 0 and bitwidth, pick 0 for undef.
  1618. // fptoui.sat and fptosi.sat can always fold to zero (for a zero input).
  1619. if (IntrinsicID == Intrinsic::cos ||
  1620. IntrinsicID == Intrinsic::ctpop ||
  1621. IntrinsicID == Intrinsic::fptoui_sat ||
  1622. IntrinsicID == Intrinsic::fptosi_sat)
  1623. return Constant::getNullValue(Ty);
  1624. if (IntrinsicID == Intrinsic::bswap ||
  1625. IntrinsicID == Intrinsic::bitreverse ||
  1626. IntrinsicID == Intrinsic::launder_invariant_group ||
  1627. IntrinsicID == Intrinsic::strip_invariant_group)
  1628. return Operands[0];
  1629. }
  1630. if (isa<ConstantPointerNull>(Operands[0])) {
  1631. // launder(null) == null == strip(null) iff in addrspace 0
  1632. if (IntrinsicID == Intrinsic::launder_invariant_group ||
  1633. IntrinsicID == Intrinsic::strip_invariant_group) {
  1634. // If instruction is not yet put in a basic block (e.g. when cloning
  1635. // a function during inlining), Call's caller may not be available.
  1636. // So check Call's BB first before querying Call->getCaller.
  1637. const Function *Caller =
  1638. Call->getParent() ? Call->getCaller() : nullptr;
  1639. if (Caller &&
  1640. !NullPointerIsDefined(
  1641. Caller, Operands[0]->getType()->getPointerAddressSpace())) {
  1642. return Operands[0];
  1643. }
  1644. return nullptr;
  1645. }
  1646. }
  1647. if (auto *Op = dyn_cast<ConstantFP>(Operands[0])) {
  1648. if (IntrinsicID == Intrinsic::convert_to_fp16) {
  1649. APFloat Val(Op->getValueAPF());
  1650. bool lost = false;
  1651. Val.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &lost);
  1652. return ConstantInt::get(Ty->getContext(), Val.bitcastToAPInt());
  1653. }
  1654. APFloat U = Op->getValueAPF();
  1655. if (IntrinsicID == Intrinsic::wasm_trunc_signed ||
  1656. IntrinsicID == Intrinsic::wasm_trunc_unsigned) {
  1657. bool Signed = IntrinsicID == Intrinsic::wasm_trunc_signed;
  1658. if (U.isNaN())
  1659. return nullptr;
  1660. unsigned Width = Ty->getIntegerBitWidth();
  1661. APSInt Int(Width, !Signed);
  1662. bool IsExact = false;
  1663. APFloat::opStatus Status =
  1664. U.convertToInteger(Int, APFloat::rmTowardZero, &IsExact);
  1665. if (Status == APFloat::opOK || Status == APFloat::opInexact)
  1666. return ConstantInt::get(Ty, Int);
  1667. return nullptr;
  1668. }
  1669. if (IntrinsicID == Intrinsic::fptoui_sat ||
  1670. IntrinsicID == Intrinsic::fptosi_sat) {
  1671. // convertToInteger() already has the desired saturation semantics.
  1672. APSInt Int(Ty->getIntegerBitWidth(),
  1673. IntrinsicID == Intrinsic::fptoui_sat);
  1674. bool IsExact;
  1675. U.convertToInteger(Int, APFloat::rmTowardZero, &IsExact);
  1676. return ConstantInt::get(Ty, Int);
  1677. }
  1678. if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())
  1679. return nullptr;
  1680. // Use internal versions of these intrinsics.
  1681. if (IntrinsicID == Intrinsic::nearbyint || IntrinsicID == Intrinsic::rint) {
  1682. U.roundToIntegral(APFloat::rmNearestTiesToEven);
  1683. return ConstantFP::get(Ty->getContext(), U);
  1684. }
  1685. if (IntrinsicID == Intrinsic::round) {
  1686. U.roundToIntegral(APFloat::rmNearestTiesToAway);
  1687. return ConstantFP::get(Ty->getContext(), U);
  1688. }
  1689. if (IntrinsicID == Intrinsic::roundeven) {
  1690. U.roundToIntegral(APFloat::rmNearestTiesToEven);
  1691. return ConstantFP::get(Ty->getContext(), U);
  1692. }
  1693. if (IntrinsicID == Intrinsic::ceil) {
  1694. U.roundToIntegral(APFloat::rmTowardPositive);
  1695. return ConstantFP::get(Ty->getContext(), U);
  1696. }
  1697. if (IntrinsicID == Intrinsic::floor) {
  1698. U.roundToIntegral(APFloat::rmTowardNegative);
  1699. return ConstantFP::get(Ty->getContext(), U);
  1700. }
  1701. if (IntrinsicID == Intrinsic::trunc) {
  1702. U.roundToIntegral(APFloat::rmTowardZero);
  1703. return ConstantFP::get(Ty->getContext(), U);
  1704. }
  1705. if (IntrinsicID == Intrinsic::fabs) {
  1706. U.clearSign();
  1707. return ConstantFP::get(Ty->getContext(), U);
  1708. }
  1709. if (IntrinsicID == Intrinsic::amdgcn_fract) {
  1710. // The v_fract instruction behaves like the OpenCL spec, which defines
  1711. // fract(x) as fmin(x - floor(x), 0x1.fffffep-1f): "The min() operator is
  1712. // there to prevent fract(-small) from returning 1.0. It returns the
  1713. // largest positive floating-point number less than 1.0."
  1714. APFloat FloorU(U);
  1715. FloorU.roundToIntegral(APFloat::rmTowardNegative);
  1716. APFloat FractU(U - FloorU);
  1717. APFloat AlmostOne(U.getSemantics(), 1);
  1718. AlmostOne.next(/*nextDown*/ true);
  1719. return ConstantFP::get(Ty->getContext(), minimum(FractU, AlmostOne));
  1720. }
  1721. // Rounding operations (floor, trunc, ceil, round and nearbyint) do not
  1722. // raise FP exceptions, unless the argument is signaling NaN.
  1723. Optional<APFloat::roundingMode> RM;
  1724. switch (IntrinsicID) {
  1725. default:
  1726. break;
  1727. case Intrinsic::experimental_constrained_nearbyint:
  1728. case Intrinsic::experimental_constrained_rint: {
  1729. auto CI = cast<ConstrainedFPIntrinsic>(Call);
  1730. RM = CI->getRoundingMode();
  1731. if (!RM || RM.getValue() == RoundingMode::Dynamic)
  1732. return nullptr;
  1733. break;
  1734. }
  1735. case Intrinsic::experimental_constrained_round:
  1736. RM = APFloat::rmNearestTiesToAway;
  1737. break;
  1738. case Intrinsic::experimental_constrained_ceil:
  1739. RM = APFloat::rmTowardPositive;
  1740. break;
  1741. case Intrinsic::experimental_constrained_floor:
  1742. RM = APFloat::rmTowardNegative;
  1743. break;
  1744. case Intrinsic::experimental_constrained_trunc:
  1745. RM = APFloat::rmTowardZero;
  1746. break;
  1747. }
  1748. if (RM) {
  1749. auto CI = cast<ConstrainedFPIntrinsic>(Call);
  1750. if (U.isFinite()) {
  1751. APFloat::opStatus St = U.roundToIntegral(*RM);
  1752. if (IntrinsicID == Intrinsic::experimental_constrained_rint &&
  1753. St == APFloat::opInexact) {
  1754. Optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior();
  1755. if (EB && *EB == fp::ebStrict)
  1756. return nullptr;
  1757. }
  1758. } else if (U.isSignaling()) {
  1759. Optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior();
  1760. if (EB && *EB != fp::ebIgnore)
  1761. return nullptr;
  1762. U = APFloat::getQNaN(U.getSemantics());
  1763. }
  1764. return ConstantFP::get(Ty->getContext(), U);
  1765. }
  1766. /// We only fold functions with finite arguments. Folding NaN and inf is
  1767. /// likely to be aborted with an exception anyway, and some host libms
  1768. /// have known errors raising exceptions.
  1769. if (!U.isFinite())
  1770. return nullptr;
  1771. /// Currently APFloat versions of these functions do not exist, so we use
  1772. /// the host native double versions. Float versions are not called
  1773. /// directly but for all these it is true (float)(f((double)arg)) ==
  1774. /// f(arg). Long double not supported yet.
  1775. const APFloat &APF = Op->getValueAPF();
  1776. switch (IntrinsicID) {
  1777. default: break;
  1778. case Intrinsic::log:
  1779. return ConstantFoldFP(log, APF, Ty);
  1780. case Intrinsic::log2:
  1781. // TODO: What about hosts that lack a C99 library?
  1782. return ConstantFoldFP(Log2, APF, Ty);
  1783. case Intrinsic::log10:
  1784. // TODO: What about hosts that lack a C99 library?
  1785. return ConstantFoldFP(log10, APF, Ty);
  1786. case Intrinsic::exp:
  1787. return ConstantFoldFP(exp, APF, Ty);
  1788. case Intrinsic::exp2:
  1789. // Fold exp2(x) as pow(2, x), in case the host lacks a C99 library.
  1790. return ConstantFoldBinaryFP(pow, APFloat(2.0), APF, Ty);
  1791. case Intrinsic::sin:
  1792. return ConstantFoldFP(sin, APF, Ty);
  1793. case Intrinsic::cos:
  1794. return ConstantFoldFP(cos, APF, Ty);
  1795. case Intrinsic::sqrt:
  1796. return ConstantFoldFP(sqrt, APF, Ty);
  1797. case Intrinsic::amdgcn_cos:
  1798. case Intrinsic::amdgcn_sin: {
  1799. double V = getValueAsDouble(Op);
  1800. if (V < -256.0 || V > 256.0)
  1801. // The gfx8 and gfx9 architectures handle arguments outside the range
  1802. // [-256, 256] differently. This should be a rare case so bail out
  1803. // rather than trying to handle the difference.
  1804. return nullptr;
  1805. bool IsCos = IntrinsicID == Intrinsic::amdgcn_cos;
  1806. double V4 = V * 4.0;
  1807. if (V4 == floor(V4)) {
  1808. // Force exact results for quarter-integer inputs.
  1809. const double SinVals[4] = { 0.0, 1.0, 0.0, -1.0 };
  1810. V = SinVals[((int)V4 + (IsCos ? 1 : 0)) & 3];
  1811. } else {
  1812. if (IsCos)
  1813. V = cos(V * 2.0 * numbers::pi);
  1814. else
  1815. V = sin(V * 2.0 * numbers::pi);
  1816. }
  1817. return GetConstantFoldFPValue(V, Ty);
  1818. }
  1819. }
  1820. if (!TLI)
  1821. return nullptr;
  1822. LibFunc Func = NotLibFunc;
  1823. if (!TLI->getLibFunc(Name, Func))
  1824. return nullptr;
  1825. switch (Func) {
  1826. default:
  1827. break;
  1828. case LibFunc_acos:
  1829. case LibFunc_acosf:
  1830. case LibFunc_acos_finite:
  1831. case LibFunc_acosf_finite:
  1832. if (TLI->has(Func))
  1833. return ConstantFoldFP(acos, APF, Ty);
  1834. break;
  1835. case LibFunc_asin:
  1836. case LibFunc_asinf:
  1837. case LibFunc_asin_finite:
  1838. case LibFunc_asinf_finite:
  1839. if (TLI->has(Func))
  1840. return ConstantFoldFP(asin, APF, Ty);
  1841. break;
  1842. case LibFunc_atan:
  1843. case LibFunc_atanf:
  1844. if (TLI->has(Func))
  1845. return ConstantFoldFP(atan, APF, Ty);
  1846. break;
  1847. case LibFunc_ceil:
  1848. case LibFunc_ceilf:
  1849. if (TLI->has(Func)) {
  1850. U.roundToIntegral(APFloat::rmTowardPositive);
  1851. return ConstantFP::get(Ty->getContext(), U);
  1852. }
  1853. break;
  1854. case LibFunc_cos:
  1855. case LibFunc_cosf:
  1856. if (TLI->has(Func))
  1857. return ConstantFoldFP(cos, APF, Ty);
  1858. break;
  1859. case LibFunc_cosh:
  1860. case LibFunc_coshf:
  1861. case LibFunc_cosh_finite:
  1862. case LibFunc_coshf_finite:
  1863. if (TLI->has(Func))
  1864. return ConstantFoldFP(cosh, APF, Ty);
  1865. break;
  1866. case LibFunc_exp:
  1867. case LibFunc_expf:
  1868. case LibFunc_exp_finite:
  1869. case LibFunc_expf_finite:
  1870. if (TLI->has(Func))
  1871. return ConstantFoldFP(exp, APF, Ty);
  1872. break;
  1873. case LibFunc_exp2:
  1874. case LibFunc_exp2f:
  1875. case LibFunc_exp2_finite:
  1876. case LibFunc_exp2f_finite:
  1877. if (TLI->has(Func))
  1878. // Fold exp2(x) as pow(2, x), in case the host lacks a C99 library.
  1879. return ConstantFoldBinaryFP(pow, APFloat(2.0), APF, Ty);
  1880. break;
  1881. case LibFunc_fabs:
  1882. case LibFunc_fabsf:
  1883. if (TLI->has(Func)) {
  1884. U.clearSign();
  1885. return ConstantFP::get(Ty->getContext(), U);
  1886. }
  1887. break;
  1888. case LibFunc_floor:
  1889. case LibFunc_floorf:
  1890. if (TLI->has(Func)) {
  1891. U.roundToIntegral(APFloat::rmTowardNegative);
  1892. return ConstantFP::get(Ty->getContext(), U);
  1893. }
  1894. break;
  1895. case LibFunc_log:
  1896. case LibFunc_logf:
  1897. case LibFunc_log_finite:
  1898. case LibFunc_logf_finite:
  1899. if (!APF.isNegative() && !APF.isZero() && TLI->has(Func))
  1900. return ConstantFoldFP(log, APF, Ty);
  1901. break;
  1902. case LibFunc_log2:
  1903. case LibFunc_log2f:
  1904. case LibFunc_log2_finite:
  1905. case LibFunc_log2f_finite:
  1906. if (!APF.isNegative() && !APF.isZero() && TLI->has(Func))
  1907. // TODO: What about hosts that lack a C99 library?
  1908. return ConstantFoldFP(Log2, APF, Ty);
  1909. break;
  1910. case LibFunc_log10:
  1911. case LibFunc_log10f:
  1912. case LibFunc_log10_finite:
  1913. case LibFunc_log10f_finite:
  1914. if (!APF.isNegative() && !APF.isZero() && TLI->has(Func))
  1915. // TODO: What about hosts that lack a C99 library?
  1916. return ConstantFoldFP(log10, APF, Ty);
  1917. break;
  1918. case LibFunc_nearbyint:
  1919. case LibFunc_nearbyintf:
  1920. case LibFunc_rint:
  1921. case LibFunc_rintf:
  1922. if (TLI->has(Func)) {
  1923. U.roundToIntegral(APFloat::rmNearestTiesToEven);
  1924. return ConstantFP::get(Ty->getContext(), U);
  1925. }
  1926. break;
  1927. case LibFunc_round:
  1928. case LibFunc_roundf:
  1929. if (TLI->has(Func)) {
  1930. U.roundToIntegral(APFloat::rmNearestTiesToAway);
  1931. return ConstantFP::get(Ty->getContext(), U);
  1932. }
  1933. break;
  1934. case LibFunc_sin:
  1935. case LibFunc_sinf:
  1936. if (TLI->has(Func))
  1937. return ConstantFoldFP(sin, APF, Ty);
  1938. break;
  1939. case LibFunc_sinh:
  1940. case LibFunc_sinhf:
  1941. case LibFunc_sinh_finite:
  1942. case LibFunc_sinhf_finite:
  1943. if (TLI->has(Func))
  1944. return ConstantFoldFP(sinh, APF, Ty);
  1945. break;
  1946. case LibFunc_sqrt:
  1947. case LibFunc_sqrtf:
  1948. if (!APF.isNegative() && TLI->has(Func))
  1949. return ConstantFoldFP(sqrt, APF, Ty);
  1950. break;
  1951. case LibFunc_tan:
  1952. case LibFunc_tanf:
  1953. if (TLI->has(Func))
  1954. return ConstantFoldFP(tan, APF, Ty);
  1955. break;
  1956. case LibFunc_tanh:
  1957. case LibFunc_tanhf:
  1958. if (TLI->has(Func))
  1959. return ConstantFoldFP(tanh, APF, Ty);
  1960. break;
  1961. case LibFunc_trunc:
  1962. case LibFunc_truncf:
  1963. if (TLI->has(Func)) {
  1964. U.roundToIntegral(APFloat::rmTowardZero);
  1965. return ConstantFP::get(Ty->getContext(), U);
  1966. }
  1967. break;
  1968. }
  1969. return nullptr;
  1970. }
  1971. if (auto *Op = dyn_cast<ConstantInt>(Operands[0])) {
  1972. switch (IntrinsicID) {
  1973. case Intrinsic::bswap:
  1974. return ConstantInt::get(Ty->getContext(), Op->getValue().byteSwap());
  1975. case Intrinsic::ctpop:
  1976. return ConstantInt::get(Ty, Op->getValue().countPopulation());
  1977. case Intrinsic::bitreverse:
  1978. return ConstantInt::get(Ty->getContext(), Op->getValue().reverseBits());
  1979. case Intrinsic::convert_from_fp16: {
  1980. APFloat Val(APFloat::IEEEhalf(), Op->getValue());
  1981. bool lost = false;
  1982. APFloat::opStatus status = Val.convert(
  1983. Ty->getFltSemantics(), APFloat::rmNearestTiesToEven, &lost);
  1984. // Conversion is always precise.
  1985. (void)status;
  1986. assert(status == APFloat::opOK && !lost &&
  1987. "Precision lost during fp16 constfolding");
  1988. return ConstantFP::get(Ty->getContext(), Val);
  1989. }
  1990. default:
  1991. return nullptr;
  1992. }
  1993. }
  1994. switch (IntrinsicID) {
  1995. default: break;
  1996. case Intrinsic::vector_reduce_add:
  1997. case Intrinsic::vector_reduce_mul:
  1998. case Intrinsic::vector_reduce_and:
  1999. case Intrinsic::vector_reduce_or:
  2000. case Intrinsic::vector_reduce_xor:
  2001. case Intrinsic::vector_reduce_smin:
  2002. case Intrinsic::vector_reduce_smax:
  2003. case Intrinsic::vector_reduce_umin:
  2004. case Intrinsic::vector_reduce_umax:
  2005. if (Constant *C = constantFoldVectorReduce(IntrinsicID, Operands[0]))
  2006. return C;
  2007. break;
  2008. }
  2009. // Support ConstantVector in case we have an Undef in the top.
  2010. if (isa<ConstantVector>(Operands[0]) ||
  2011. isa<ConstantDataVector>(Operands[0])) {
  2012. auto *Op = cast<Constant>(Operands[0]);
  2013. switch (IntrinsicID) {
  2014. default: break;
  2015. case Intrinsic::x86_sse_cvtss2si:
  2016. case Intrinsic::x86_sse_cvtss2si64:
  2017. case Intrinsic::x86_sse2_cvtsd2si:
  2018. case Intrinsic::x86_sse2_cvtsd2si64:
  2019. if (ConstantFP *FPOp =
  2020. dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
  2021. return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
  2022. /*roundTowardZero=*/false, Ty,
  2023. /*IsSigned*/true);
  2024. break;
  2025. case Intrinsic::x86_sse_cvttss2si:
  2026. case Intrinsic::x86_sse_cvttss2si64:
  2027. case Intrinsic::x86_sse2_cvttsd2si:
  2028. case Intrinsic::x86_sse2_cvttsd2si64:
  2029. if (ConstantFP *FPOp =
  2030. dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
  2031. return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
  2032. /*roundTowardZero=*/true, Ty,
  2033. /*IsSigned*/true);
  2034. break;
  2035. }
  2036. }
  2037. return nullptr;
  2038. }
  2039. static Constant *ConstantFoldScalarCall2(StringRef Name,
  2040. Intrinsic::ID IntrinsicID,
  2041. Type *Ty,
  2042. ArrayRef<Constant *> Operands,
  2043. const TargetLibraryInfo *TLI,
  2044. const CallBase *Call) {
  2045. assert(Operands.size() == 2 && "Wrong number of operands.");
  2046. if (Ty->isFloatingPointTy()) {
  2047. // TODO: We should have undef handling for all of the FP intrinsics that
  2048. // are attempted to be folded in this function.
  2049. bool IsOp0Undef = isa<UndefValue>(Operands[0]);
  2050. bool IsOp1Undef = isa<UndefValue>(Operands[1]);
  2051. switch (IntrinsicID) {
  2052. case Intrinsic::maxnum:
  2053. case Intrinsic::minnum:
  2054. case Intrinsic::maximum:
  2055. case Intrinsic::minimum:
  2056. // If one argument is undef, return the other argument.
  2057. if (IsOp0Undef)
  2058. return Operands[1];
  2059. if (IsOp1Undef)
  2060. return Operands[0];
  2061. break;
  2062. }
  2063. }
  2064. if (const auto *Op1 = dyn_cast<ConstantFP>(Operands[0])) {
  2065. if (!Ty->isFloatingPointTy())
  2066. return nullptr;
  2067. const APFloat &Op1V = Op1->getValueAPF();
  2068. if (const auto *Op2 = dyn_cast<ConstantFP>(Operands[1])) {
  2069. if (Op2->getType() != Op1->getType())
  2070. return nullptr;
  2071. const APFloat &Op2V = Op2->getValueAPF();
  2072. if (const auto *ConstrIntr = dyn_cast<ConstrainedFPIntrinsic>(Call)) {
  2073. RoundingMode RM = getEvaluationRoundingMode(ConstrIntr);
  2074. APFloat Res = Op1V;
  2075. APFloat::opStatus St;
  2076. switch (IntrinsicID) {
  2077. default:
  2078. return nullptr;
  2079. case Intrinsic::experimental_constrained_fadd:
  2080. St = Res.add(Op2V, RM);
  2081. break;
  2082. case Intrinsic::experimental_constrained_fsub:
  2083. St = Res.subtract(Op2V, RM);
  2084. break;
  2085. case Intrinsic::experimental_constrained_fmul:
  2086. St = Res.multiply(Op2V, RM);
  2087. break;
  2088. case Intrinsic::experimental_constrained_fdiv:
  2089. St = Res.divide(Op2V, RM);
  2090. break;
  2091. case Intrinsic::experimental_constrained_frem:
  2092. St = Res.mod(Op2V);
  2093. break;
  2094. }
  2095. if (mayFoldConstrained(const_cast<ConstrainedFPIntrinsic *>(ConstrIntr),
  2096. St))
  2097. return ConstantFP::get(Ty->getContext(), Res);
  2098. return nullptr;
  2099. }
  2100. switch (IntrinsicID) {
  2101. default:
  2102. break;
  2103. case Intrinsic::copysign:
  2104. return ConstantFP::get(Ty->getContext(), APFloat::copySign(Op1V, Op2V));
  2105. case Intrinsic::minnum:
  2106. return ConstantFP::get(Ty->getContext(), minnum(Op1V, Op2V));
  2107. case Intrinsic::maxnum:
  2108. return ConstantFP::get(Ty->getContext(), maxnum(Op1V, Op2V));
  2109. case Intrinsic::minimum:
  2110. return ConstantFP::get(Ty->getContext(), minimum(Op1V, Op2V));
  2111. case Intrinsic::maximum:
  2112. return ConstantFP::get(Ty->getContext(), maximum(Op1V, Op2V));
  2113. }
  2114. if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())
  2115. return nullptr;
  2116. switch (IntrinsicID) {
  2117. default:
  2118. break;
  2119. case Intrinsic::pow:
  2120. return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
  2121. case Intrinsic::amdgcn_fmul_legacy:
  2122. // The legacy behaviour is that multiplying +/- 0.0 by anything, even
  2123. // NaN or infinity, gives +0.0.
  2124. if (Op1V.isZero() || Op2V.isZero())
  2125. return ConstantFP::getNullValue(Ty);
  2126. return ConstantFP::get(Ty->getContext(), Op1V * Op2V);
  2127. }
  2128. if (!TLI)
  2129. return nullptr;
  2130. LibFunc Func = NotLibFunc;
  2131. if (!TLI->getLibFunc(Name, Func))
  2132. return nullptr;
  2133. switch (Func) {
  2134. default:
  2135. break;
  2136. case LibFunc_pow:
  2137. case LibFunc_powf:
  2138. case LibFunc_pow_finite:
  2139. case LibFunc_powf_finite:
  2140. if (TLI->has(Func))
  2141. return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
  2142. break;
  2143. case LibFunc_fmod:
  2144. case LibFunc_fmodf:
  2145. if (TLI->has(Func)) {
  2146. APFloat V = Op1->getValueAPF();
  2147. if (APFloat::opStatus::opOK == V.mod(Op2->getValueAPF()))
  2148. return ConstantFP::get(Ty->getContext(), V);
  2149. }
  2150. break;
  2151. case LibFunc_remainder:
  2152. case LibFunc_remainderf:
  2153. if (TLI->has(Func)) {
  2154. APFloat V = Op1->getValueAPF();
  2155. if (APFloat::opStatus::opOK == V.remainder(Op2->getValueAPF()))
  2156. return ConstantFP::get(Ty->getContext(), V);
  2157. }
  2158. break;
  2159. case LibFunc_atan2:
  2160. case LibFunc_atan2f:
  2161. case LibFunc_atan2_finite:
  2162. case LibFunc_atan2f_finite:
  2163. if (TLI->has(Func))
  2164. return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty);
  2165. break;
  2166. }
  2167. } else if (auto *Op2C = dyn_cast<ConstantInt>(Operands[1])) {
  2168. if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())
  2169. return nullptr;
  2170. if (IntrinsicID == Intrinsic::powi && Ty->isHalfTy())
  2171. return ConstantFP::get(
  2172. Ty->getContext(),
  2173. APFloat((float)std::pow((float)Op1V.convertToDouble(),
  2174. (int)Op2C->getZExtValue())));
  2175. if (IntrinsicID == Intrinsic::powi && Ty->isFloatTy())
  2176. return ConstantFP::get(
  2177. Ty->getContext(),
  2178. APFloat((float)std::pow((float)Op1V.convertToDouble(),
  2179. (int)Op2C->getZExtValue())));
  2180. if (IntrinsicID == Intrinsic::powi && Ty->isDoubleTy())
  2181. return ConstantFP::get(
  2182. Ty->getContext(),
  2183. APFloat((double)std::pow(Op1V.convertToDouble(),
  2184. (int)Op2C->getZExtValue())));
  2185. if (IntrinsicID == Intrinsic::amdgcn_ldexp) {
  2186. // FIXME: Should flush denorms depending on FP mode, but that's ignored
  2187. // everywhere else.
  2188. // scalbn is equivalent to ldexp with float radix 2
  2189. APFloat Result = scalbn(Op1->getValueAPF(), Op2C->getSExtValue(),
  2190. APFloat::rmNearestTiesToEven);
  2191. return ConstantFP::get(Ty->getContext(), Result);
  2192. }
  2193. }
  2194. return nullptr;
  2195. }
  2196. if (Operands[0]->getType()->isIntegerTy() &&
  2197. Operands[1]->getType()->isIntegerTy()) {
  2198. const APInt *C0, *C1;
  2199. if (!getConstIntOrUndef(Operands[0], C0) ||
  2200. !getConstIntOrUndef(Operands[1], C1))
  2201. return nullptr;
  2202. switch (IntrinsicID) {
  2203. default: break;
  2204. case Intrinsic::smax:
  2205. case Intrinsic::smin:
  2206. case Intrinsic::umax:
  2207. case Intrinsic::umin:
  2208. if (!C0 && !C1)
  2209. return UndefValue::get(Ty);
  2210. if (!C0 || !C1)
  2211. return MinMaxIntrinsic::getSaturationPoint(IntrinsicID, Ty);
  2212. return ConstantInt::get(
  2213. Ty, ICmpInst::compare(*C0, *C1,
  2214. MinMaxIntrinsic::getPredicate(IntrinsicID))
  2215. ? *C0
  2216. : *C1);
  2217. case Intrinsic::usub_with_overflow:
  2218. case Intrinsic::ssub_with_overflow:
  2219. // X - undef -> { 0, false }
  2220. // undef - X -> { 0, false }
  2221. if (!C0 || !C1)
  2222. return Constant::getNullValue(Ty);
  2223. LLVM_FALLTHROUGH;
  2224. case Intrinsic::uadd_with_overflow:
  2225. case Intrinsic::sadd_with_overflow:
  2226. // X + undef -> { -1, false }
  2227. // undef + x -> { -1, false }
  2228. if (!C0 || !C1) {
  2229. return ConstantStruct::get(
  2230. cast<StructType>(Ty),
  2231. {Constant::getAllOnesValue(Ty->getStructElementType(0)),
  2232. Constant::getNullValue(Ty->getStructElementType(1))});
  2233. }
  2234. LLVM_FALLTHROUGH;
  2235. case Intrinsic::smul_with_overflow:
  2236. case Intrinsic::umul_with_overflow: {
  2237. // undef * X -> { 0, false }
  2238. // X * undef -> { 0, false }
  2239. if (!C0 || !C1)
  2240. return Constant::getNullValue(Ty);
  2241. APInt Res;
  2242. bool Overflow;
  2243. switch (IntrinsicID) {
  2244. default: llvm_unreachable("Invalid case");
  2245. case Intrinsic::sadd_with_overflow:
  2246. Res = C0->sadd_ov(*C1, Overflow);
  2247. break;
  2248. case Intrinsic::uadd_with_overflow:
  2249. Res = C0->uadd_ov(*C1, Overflow);
  2250. break;
  2251. case Intrinsic::ssub_with_overflow:
  2252. Res = C0->ssub_ov(*C1, Overflow);
  2253. break;
  2254. case Intrinsic::usub_with_overflow:
  2255. Res = C0->usub_ov(*C1, Overflow);
  2256. break;
  2257. case Intrinsic::smul_with_overflow:
  2258. Res = C0->smul_ov(*C1, Overflow);
  2259. break;
  2260. case Intrinsic::umul_with_overflow:
  2261. Res = C0->umul_ov(*C1, Overflow);
  2262. break;
  2263. }
  2264. Constant *Ops[] = {
  2265. ConstantInt::get(Ty->getContext(), Res),
  2266. ConstantInt::get(Type::getInt1Ty(Ty->getContext()), Overflow)
  2267. };
  2268. return ConstantStruct::get(cast<StructType>(Ty), Ops);
  2269. }
  2270. case Intrinsic::uadd_sat:
  2271. case Intrinsic::sadd_sat:
  2272. if (!C0 && !C1)
  2273. return UndefValue::get(Ty);
  2274. if (!C0 || !C1)
  2275. return Constant::getAllOnesValue(Ty);
  2276. if (IntrinsicID == Intrinsic::uadd_sat)
  2277. return ConstantInt::get(Ty, C0->uadd_sat(*C1));
  2278. else
  2279. return ConstantInt::get(Ty, C0->sadd_sat(*C1));
  2280. case Intrinsic::usub_sat:
  2281. case Intrinsic::ssub_sat:
  2282. if (!C0 && !C1)
  2283. return UndefValue::get(Ty);
  2284. if (!C0 || !C1)
  2285. return Constant::getNullValue(Ty);
  2286. if (IntrinsicID == Intrinsic::usub_sat)
  2287. return ConstantInt::get(Ty, C0->usub_sat(*C1));
  2288. else
  2289. return ConstantInt::get(Ty, C0->ssub_sat(*C1));
  2290. case Intrinsic::cttz:
  2291. case Intrinsic::ctlz:
  2292. assert(C1 && "Must be constant int");
  2293. // cttz(0, 1) and ctlz(0, 1) are poison.
  2294. if (C1->isOne() && (!C0 || C0->isZero()))
  2295. return PoisonValue::get(Ty);
  2296. if (!C0)
  2297. return Constant::getNullValue(Ty);
  2298. if (IntrinsicID == Intrinsic::cttz)
  2299. return ConstantInt::get(Ty, C0->countTrailingZeros());
  2300. else
  2301. return ConstantInt::get(Ty, C0->countLeadingZeros());
  2302. case Intrinsic::abs:
  2303. assert(C1 && "Must be constant int");
  2304. assert((C1->isOne() || C1->isZero()) && "Must be 0 or 1");
  2305. // Undef or minimum val operand with poison min --> undef
  2306. if (C1->isOne() && (!C0 || C0->isMinSignedValue()))
  2307. return UndefValue::get(Ty);
  2308. // Undef operand with no poison min --> 0 (sign bit must be clear)
  2309. if (!C0)
  2310. return Constant::getNullValue(Ty);
  2311. return ConstantInt::get(Ty, C0->abs());
  2312. }
  2313. return nullptr;
  2314. }
  2315. // Support ConstantVector in case we have an Undef in the top.
  2316. if ((isa<ConstantVector>(Operands[0]) ||
  2317. isa<ConstantDataVector>(Operands[0])) &&
  2318. // Check for default rounding mode.
  2319. // FIXME: Support other rounding modes?
  2320. isa<ConstantInt>(Operands[1]) &&
  2321. cast<ConstantInt>(Operands[1])->getValue() == 4) {
  2322. auto *Op = cast<Constant>(Operands[0]);
  2323. switch (IntrinsicID) {
  2324. default: break;
  2325. case Intrinsic::x86_avx512_vcvtss2si32:
  2326. case Intrinsic::x86_avx512_vcvtss2si64:
  2327. case Intrinsic::x86_avx512_vcvtsd2si32:
  2328. case Intrinsic::x86_avx512_vcvtsd2si64:
  2329. if (ConstantFP *FPOp =
  2330. dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
  2331. return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
  2332. /*roundTowardZero=*/false, Ty,
  2333. /*IsSigned*/true);
  2334. break;
  2335. case Intrinsic::x86_avx512_vcvtss2usi32:
  2336. case Intrinsic::x86_avx512_vcvtss2usi64:
  2337. case Intrinsic::x86_avx512_vcvtsd2usi32:
  2338. case Intrinsic::x86_avx512_vcvtsd2usi64:
  2339. if (ConstantFP *FPOp =
  2340. dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
  2341. return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
  2342. /*roundTowardZero=*/false, Ty,
  2343. /*IsSigned*/false);
  2344. break;
  2345. case Intrinsic::x86_avx512_cvttss2si:
  2346. case Intrinsic::x86_avx512_cvttss2si64:
  2347. case Intrinsic::x86_avx512_cvttsd2si:
  2348. case Intrinsic::x86_avx512_cvttsd2si64:
  2349. if (ConstantFP *FPOp =
  2350. dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
  2351. return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
  2352. /*roundTowardZero=*/true, Ty,
  2353. /*IsSigned*/true);
  2354. break;
  2355. case Intrinsic::x86_avx512_cvttss2usi:
  2356. case Intrinsic::x86_avx512_cvttss2usi64:
  2357. case Intrinsic::x86_avx512_cvttsd2usi:
  2358. case Intrinsic::x86_avx512_cvttsd2usi64:
  2359. if (ConstantFP *FPOp =
  2360. dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
  2361. return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
  2362. /*roundTowardZero=*/true, Ty,
  2363. /*IsSigned*/false);
  2364. break;
  2365. }
  2366. }
  2367. return nullptr;
  2368. }
  2369. static APFloat ConstantFoldAMDGCNCubeIntrinsic(Intrinsic::ID IntrinsicID,
  2370. const APFloat &S0,
  2371. const APFloat &S1,
  2372. const APFloat &S2) {
  2373. unsigned ID;
  2374. const fltSemantics &Sem = S0.getSemantics();
  2375. APFloat MA(Sem), SC(Sem), TC(Sem);
  2376. if (abs(S2) >= abs(S0) && abs(S2) >= abs(S1)) {
  2377. if (S2.isNegative() && S2.isNonZero() && !S2.isNaN()) {
  2378. // S2 < 0
  2379. ID = 5;
  2380. SC = -S0;
  2381. } else {
  2382. ID = 4;
  2383. SC = S0;
  2384. }
  2385. MA = S2;
  2386. TC = -S1;
  2387. } else if (abs(S1) >= abs(S0)) {
  2388. if (S1.isNegative() && S1.isNonZero() && !S1.isNaN()) {
  2389. // S1 < 0
  2390. ID = 3;
  2391. TC = -S2;
  2392. } else {
  2393. ID = 2;
  2394. TC = S2;
  2395. }
  2396. MA = S1;
  2397. SC = S0;
  2398. } else {
  2399. if (S0.isNegative() && S0.isNonZero() && !S0.isNaN()) {
  2400. // S0 < 0
  2401. ID = 1;
  2402. SC = S2;
  2403. } else {
  2404. ID = 0;
  2405. SC = -S2;
  2406. }
  2407. MA = S0;
  2408. TC = -S1;
  2409. }
  2410. switch (IntrinsicID) {
  2411. default:
  2412. llvm_unreachable("unhandled amdgcn cube intrinsic");
  2413. case Intrinsic::amdgcn_cubeid:
  2414. return APFloat(Sem, ID);
  2415. case Intrinsic::amdgcn_cubema:
  2416. return MA + MA;
  2417. case Intrinsic::amdgcn_cubesc:
  2418. return SC;
  2419. case Intrinsic::amdgcn_cubetc:
  2420. return TC;
  2421. }
  2422. }
  2423. static Constant *ConstantFoldAMDGCNPermIntrinsic(ArrayRef<Constant *> Operands,
  2424. Type *Ty) {
  2425. const APInt *C0, *C1, *C2;
  2426. if (!getConstIntOrUndef(Operands[0], C0) ||
  2427. !getConstIntOrUndef(Operands[1], C1) ||
  2428. !getConstIntOrUndef(Operands[2], C2))
  2429. return nullptr;
  2430. if (!C2)
  2431. return UndefValue::get(Ty);
  2432. APInt Val(32, 0);
  2433. unsigned NumUndefBytes = 0;
  2434. for (unsigned I = 0; I < 32; I += 8) {
  2435. unsigned Sel = C2->extractBitsAsZExtValue(8, I);
  2436. unsigned B = 0;
  2437. if (Sel >= 13)
  2438. B = 0xff;
  2439. else if (Sel == 12)
  2440. B = 0x00;
  2441. else {
  2442. const APInt *Src = ((Sel & 10) == 10 || (Sel & 12) == 4) ? C0 : C1;
  2443. if (!Src)
  2444. ++NumUndefBytes;
  2445. else if (Sel < 8)
  2446. B = Src->extractBitsAsZExtValue(8, (Sel & 3) * 8);
  2447. else
  2448. B = Src->extractBitsAsZExtValue(1, (Sel & 1) ? 31 : 15) * 0xff;
  2449. }
  2450. Val.insertBits(B, I, 8);
  2451. }
  2452. if (NumUndefBytes == 4)
  2453. return UndefValue::get(Ty);
  2454. return ConstantInt::get(Ty, Val);
  2455. }
  2456. static Constant *ConstantFoldScalarCall3(StringRef Name,
  2457. Intrinsic::ID IntrinsicID,
  2458. Type *Ty,
  2459. ArrayRef<Constant *> Operands,
  2460. const TargetLibraryInfo *TLI,
  2461. const CallBase *Call) {
  2462. assert(Operands.size() == 3 && "Wrong number of operands.");
  2463. if (const auto *Op1 = dyn_cast<ConstantFP>(Operands[0])) {
  2464. if (const auto *Op2 = dyn_cast<ConstantFP>(Operands[1])) {
  2465. if (const auto *Op3 = dyn_cast<ConstantFP>(Operands[2])) {
  2466. const APFloat &C1 = Op1->getValueAPF();
  2467. const APFloat &C2 = Op2->getValueAPF();
  2468. const APFloat &C3 = Op3->getValueAPF();
  2469. if (const auto *ConstrIntr = dyn_cast<ConstrainedFPIntrinsic>(Call)) {
  2470. RoundingMode RM = getEvaluationRoundingMode(ConstrIntr);
  2471. APFloat Res = C1;
  2472. APFloat::opStatus St;
  2473. switch (IntrinsicID) {
  2474. default:
  2475. return nullptr;
  2476. case Intrinsic::experimental_constrained_fma:
  2477. case Intrinsic::experimental_constrained_fmuladd:
  2478. St = Res.fusedMultiplyAdd(C2, C3, RM);
  2479. break;
  2480. }
  2481. if (mayFoldConstrained(
  2482. const_cast<ConstrainedFPIntrinsic *>(ConstrIntr), St))
  2483. return ConstantFP::get(Ty->getContext(), Res);
  2484. return nullptr;
  2485. }
  2486. switch (IntrinsicID) {
  2487. default: break;
  2488. case Intrinsic::amdgcn_fma_legacy: {
  2489. // The legacy behaviour is that multiplying +/- 0.0 by anything, even
  2490. // NaN or infinity, gives +0.0.
  2491. if (C1.isZero() || C2.isZero()) {
  2492. // It's tempting to just return C3 here, but that would give the
  2493. // wrong result if C3 was -0.0.
  2494. return ConstantFP::get(Ty->getContext(), APFloat(0.0f) + C3);
  2495. }
  2496. LLVM_FALLTHROUGH;
  2497. }
  2498. case Intrinsic::fma:
  2499. case Intrinsic::fmuladd: {
  2500. APFloat V = C1;
  2501. V.fusedMultiplyAdd(C2, C3, APFloat::rmNearestTiesToEven);
  2502. return ConstantFP::get(Ty->getContext(), V);
  2503. }
  2504. case Intrinsic::amdgcn_cubeid:
  2505. case Intrinsic::amdgcn_cubema:
  2506. case Intrinsic::amdgcn_cubesc:
  2507. case Intrinsic::amdgcn_cubetc: {
  2508. APFloat V = ConstantFoldAMDGCNCubeIntrinsic(IntrinsicID, C1, C2, C3);
  2509. return ConstantFP::get(Ty->getContext(), V);
  2510. }
  2511. }
  2512. }
  2513. }
  2514. }
  2515. if (IntrinsicID == Intrinsic::smul_fix ||
  2516. IntrinsicID == Intrinsic::smul_fix_sat) {
  2517. // poison * C -> poison
  2518. // C * poison -> poison
  2519. if (isa<PoisonValue>(Operands[0]) || isa<PoisonValue>(Operands[1]))
  2520. return PoisonValue::get(Ty);
  2521. const APInt *C0, *C1;
  2522. if (!getConstIntOrUndef(Operands[0], C0) ||
  2523. !getConstIntOrUndef(Operands[1], C1))
  2524. return nullptr;
  2525. // undef * C -> 0
  2526. // C * undef -> 0
  2527. if (!C0 || !C1)
  2528. return Constant::getNullValue(Ty);
  2529. // This code performs rounding towards negative infinity in case the result
  2530. // cannot be represented exactly for the given scale. Targets that do care
  2531. // about rounding should use a target hook for specifying how rounding
  2532. // should be done, and provide their own folding to be consistent with
  2533. // rounding. This is the same approach as used by
  2534. // DAGTypeLegalizer::ExpandIntRes_MULFIX.
  2535. unsigned Scale = cast<ConstantInt>(Operands[2])->getZExtValue();
  2536. unsigned Width = C0->getBitWidth();
  2537. assert(Scale < Width && "Illegal scale.");
  2538. unsigned ExtendedWidth = Width * 2;
  2539. APInt Product = (C0->sextOrSelf(ExtendedWidth) *
  2540. C1->sextOrSelf(ExtendedWidth)).ashr(Scale);
  2541. if (IntrinsicID == Intrinsic::smul_fix_sat) {
  2542. APInt Max = APInt::getSignedMaxValue(Width).sextOrSelf(ExtendedWidth);
  2543. APInt Min = APInt::getSignedMinValue(Width).sextOrSelf(ExtendedWidth);
  2544. Product = APIntOps::smin(Product, Max);
  2545. Product = APIntOps::smax(Product, Min);
  2546. }
  2547. return ConstantInt::get(Ty->getContext(), Product.sextOrTrunc(Width));
  2548. }
  2549. if (IntrinsicID == Intrinsic::fshl || IntrinsicID == Intrinsic::fshr) {
  2550. const APInt *C0, *C1, *C2;
  2551. if (!getConstIntOrUndef(Operands[0], C0) ||
  2552. !getConstIntOrUndef(Operands[1], C1) ||
  2553. !getConstIntOrUndef(Operands[2], C2))
  2554. return nullptr;
  2555. bool IsRight = IntrinsicID == Intrinsic::fshr;
  2556. if (!C2)
  2557. return Operands[IsRight ? 1 : 0];
  2558. if (!C0 && !C1)
  2559. return UndefValue::get(Ty);
  2560. // The shift amount is interpreted as modulo the bitwidth. If the shift
  2561. // amount is effectively 0, avoid UB due to oversized inverse shift below.
  2562. unsigned BitWidth = C2->getBitWidth();
  2563. unsigned ShAmt = C2->urem(BitWidth);
  2564. if (!ShAmt)
  2565. return Operands[IsRight ? 1 : 0];
  2566. // (C0 << ShlAmt) | (C1 >> LshrAmt)
  2567. unsigned LshrAmt = IsRight ? ShAmt : BitWidth - ShAmt;
  2568. unsigned ShlAmt = !IsRight ? ShAmt : BitWidth - ShAmt;
  2569. if (!C0)
  2570. return ConstantInt::get(Ty, C1->lshr(LshrAmt));
  2571. if (!C1)
  2572. return ConstantInt::get(Ty, C0->shl(ShlAmt));
  2573. return ConstantInt::get(Ty, C0->shl(ShlAmt) | C1->lshr(LshrAmt));
  2574. }
  2575. if (IntrinsicID == Intrinsic::amdgcn_perm)
  2576. return ConstantFoldAMDGCNPermIntrinsic(Operands, Ty);
  2577. return nullptr;
  2578. }
  2579. static Constant *ConstantFoldScalarCall(StringRef Name,
  2580. Intrinsic::ID IntrinsicID,
  2581. Type *Ty,
  2582. ArrayRef<Constant *> Operands,
  2583. const TargetLibraryInfo *TLI,
  2584. const CallBase *Call) {
  2585. if (Operands.size() == 1)
  2586. return ConstantFoldScalarCall1(Name, IntrinsicID, Ty, Operands, TLI, Call);
  2587. if (Operands.size() == 2)
  2588. return ConstantFoldScalarCall2(Name, IntrinsicID, Ty, Operands, TLI, Call);
  2589. if (Operands.size() == 3)
  2590. return ConstantFoldScalarCall3(Name, IntrinsicID, Ty, Operands, TLI, Call);
  2591. return nullptr;
  2592. }
  2593. static Constant *ConstantFoldFixedVectorCall(
  2594. StringRef Name, Intrinsic::ID IntrinsicID, FixedVectorType *FVTy,
  2595. ArrayRef<Constant *> Operands, const DataLayout &DL,
  2596. const TargetLibraryInfo *TLI, const CallBase *Call) {
  2597. SmallVector<Constant *, 4> Result(FVTy->getNumElements());
  2598. SmallVector<Constant *, 4> Lane(Operands.size());
  2599. Type *Ty = FVTy->getElementType();
  2600. switch (IntrinsicID) {
  2601. case Intrinsic::masked_load: {
  2602. auto *SrcPtr = Operands[0];
  2603. auto *Mask = Operands[2];
  2604. auto *Passthru = Operands[3];
  2605. Constant *VecData = ConstantFoldLoadFromConstPtr(SrcPtr, FVTy, DL);
  2606. SmallVector<Constant *, 32> NewElements;
  2607. for (unsigned I = 0, E = FVTy->getNumElements(); I != E; ++I) {
  2608. auto *MaskElt = Mask->getAggregateElement(I);
  2609. if (!MaskElt)
  2610. break;
  2611. auto *PassthruElt = Passthru->getAggregateElement(I);
  2612. auto *VecElt = VecData ? VecData->getAggregateElement(I) : nullptr;
  2613. if (isa<UndefValue>(MaskElt)) {
  2614. if (PassthruElt)
  2615. NewElements.push_back(PassthruElt);
  2616. else if (VecElt)
  2617. NewElements.push_back(VecElt);
  2618. else
  2619. return nullptr;
  2620. }
  2621. if (MaskElt->isNullValue()) {
  2622. if (!PassthruElt)
  2623. return nullptr;
  2624. NewElements.push_back(PassthruElt);
  2625. } else if (MaskElt->isOneValue()) {
  2626. if (!VecElt)
  2627. return nullptr;
  2628. NewElements.push_back(VecElt);
  2629. } else {
  2630. return nullptr;
  2631. }
  2632. }
  2633. if (NewElements.size() != FVTy->getNumElements())
  2634. return nullptr;
  2635. return ConstantVector::get(NewElements);
  2636. }
  2637. case Intrinsic::arm_mve_vctp8:
  2638. case Intrinsic::arm_mve_vctp16:
  2639. case Intrinsic::arm_mve_vctp32:
  2640. case Intrinsic::arm_mve_vctp64: {
  2641. if (auto *Op = dyn_cast<ConstantInt>(Operands[0])) {
  2642. unsigned Lanes = FVTy->getNumElements();
  2643. uint64_t Limit = Op->getZExtValue();
  2644. SmallVector<Constant *, 16> NCs;
  2645. for (unsigned i = 0; i < Lanes; i++) {
  2646. if (i < Limit)
  2647. NCs.push_back(ConstantInt::getTrue(Ty));
  2648. else
  2649. NCs.push_back(ConstantInt::getFalse(Ty));
  2650. }
  2651. return ConstantVector::get(NCs);
  2652. }
  2653. break;
  2654. }
  2655. case Intrinsic::get_active_lane_mask: {
  2656. auto *Op0 = dyn_cast<ConstantInt>(Operands[0]);
  2657. auto *Op1 = dyn_cast<ConstantInt>(Operands[1]);
  2658. if (Op0 && Op1) {
  2659. unsigned Lanes = FVTy->getNumElements();
  2660. uint64_t Base = Op0->getZExtValue();
  2661. uint64_t Limit = Op1->getZExtValue();
  2662. SmallVector<Constant *, 16> NCs;
  2663. for (unsigned i = 0; i < Lanes; i++) {
  2664. if (Base + i < Limit)
  2665. NCs.push_back(ConstantInt::getTrue(Ty));
  2666. else
  2667. NCs.push_back(ConstantInt::getFalse(Ty));
  2668. }
  2669. return ConstantVector::get(NCs);
  2670. }
  2671. break;
  2672. }
  2673. default:
  2674. break;
  2675. }
  2676. for (unsigned I = 0, E = FVTy->getNumElements(); I != E; ++I) {
  2677. // Gather a column of constants.
  2678. for (unsigned J = 0, JE = Operands.size(); J != JE; ++J) {
  2679. // Some intrinsics use a scalar type for certain arguments.
  2680. if (hasVectorInstrinsicScalarOpd(IntrinsicID, J)) {
  2681. Lane[J] = Operands[J];
  2682. continue;
  2683. }
  2684. Constant *Agg = Operands[J]->getAggregateElement(I);
  2685. if (!Agg)
  2686. return nullptr;
  2687. Lane[J] = Agg;
  2688. }
  2689. // Use the regular scalar folding to simplify this column.
  2690. Constant *Folded =
  2691. ConstantFoldScalarCall(Name, IntrinsicID, Ty, Lane, TLI, Call);
  2692. if (!Folded)
  2693. return nullptr;
  2694. Result[I] = Folded;
  2695. }
  2696. return ConstantVector::get(Result);
  2697. }
  2698. static Constant *ConstantFoldScalableVectorCall(
  2699. StringRef Name, Intrinsic::ID IntrinsicID, ScalableVectorType *SVTy,
  2700. ArrayRef<Constant *> Operands, const DataLayout &DL,
  2701. const TargetLibraryInfo *TLI, const CallBase *Call) {
  2702. switch (IntrinsicID) {
  2703. case Intrinsic::aarch64_sve_convert_from_svbool: {
  2704. auto *Src = dyn_cast<Constant>(Operands[0]);
  2705. if (!Src || !Src->isNullValue())
  2706. break;
  2707. return ConstantInt::getFalse(SVTy);
  2708. }
  2709. default:
  2710. break;
  2711. }
  2712. return nullptr;
  2713. }
  2714. } // end anonymous namespace
  2715. Constant *llvm::ConstantFoldCall(const CallBase *Call, Function *F,
  2716. ArrayRef<Constant *> Operands,
  2717. const TargetLibraryInfo *TLI) {
  2718. if (Call->isNoBuiltin())
  2719. return nullptr;
  2720. if (!F->hasName())
  2721. return nullptr;
  2722. // If this is not an intrinsic and not recognized as a library call, bail out.
  2723. if (F->getIntrinsicID() == Intrinsic::not_intrinsic) {
  2724. if (!TLI)
  2725. return nullptr;
  2726. LibFunc LibF;
  2727. if (!TLI->getLibFunc(*F, LibF))
  2728. return nullptr;
  2729. }
  2730. StringRef Name = F->getName();
  2731. Type *Ty = F->getReturnType();
  2732. if (auto *FVTy = dyn_cast<FixedVectorType>(Ty))
  2733. return ConstantFoldFixedVectorCall(
  2734. Name, F->getIntrinsicID(), FVTy, Operands,
  2735. F->getParent()->getDataLayout(), TLI, Call);
  2736. if (auto *SVTy = dyn_cast<ScalableVectorType>(Ty))
  2737. return ConstantFoldScalableVectorCall(
  2738. Name, F->getIntrinsicID(), SVTy, Operands,
  2739. F->getParent()->getDataLayout(), TLI, Call);
  2740. // TODO: If this is a library function, we already discovered that above,
  2741. // so we should pass the LibFunc, not the name (and it might be better
  2742. // still to separate intrinsic handling from libcalls).
  2743. return ConstantFoldScalarCall(Name, F->getIntrinsicID(), Ty, Operands, TLI,
  2744. Call);
  2745. }
  2746. bool llvm::isMathLibCallNoop(const CallBase *Call,
  2747. const TargetLibraryInfo *TLI) {
  2748. // FIXME: Refactor this code; this duplicates logic in LibCallsShrinkWrap
  2749. // (and to some extent ConstantFoldScalarCall).
  2750. if (Call->isNoBuiltin() || Call->isStrictFP())
  2751. return false;
  2752. Function *F = Call->getCalledFunction();
  2753. if (!F)
  2754. return false;
  2755. LibFunc Func;
  2756. if (!TLI || !TLI->getLibFunc(*F, Func))
  2757. return false;
  2758. if (Call->arg_size() == 1) {
  2759. if (ConstantFP *OpC = dyn_cast<ConstantFP>(Call->getArgOperand(0))) {
  2760. const APFloat &Op = OpC->getValueAPF();
  2761. switch (Func) {
  2762. case LibFunc_logl:
  2763. case LibFunc_log:
  2764. case LibFunc_logf:
  2765. case LibFunc_log2l:
  2766. case LibFunc_log2:
  2767. case LibFunc_log2f:
  2768. case LibFunc_log10l:
  2769. case LibFunc_log10:
  2770. case LibFunc_log10f:
  2771. return Op.isNaN() || (!Op.isZero() && !Op.isNegative());
  2772. case LibFunc_expl:
  2773. case LibFunc_exp:
  2774. case LibFunc_expf:
  2775. // FIXME: These boundaries are slightly conservative.
  2776. if (OpC->getType()->isDoubleTy())
  2777. return !(Op < APFloat(-745.0) || Op > APFloat(709.0));
  2778. if (OpC->getType()->isFloatTy())
  2779. return !(Op < APFloat(-103.0f) || Op > APFloat(88.0f));
  2780. break;
  2781. case LibFunc_exp2l:
  2782. case LibFunc_exp2:
  2783. case LibFunc_exp2f:
  2784. // FIXME: These boundaries are slightly conservative.
  2785. if (OpC->getType()->isDoubleTy())
  2786. return !(Op < APFloat(-1074.0) || Op > APFloat(1023.0));
  2787. if (OpC->getType()->isFloatTy())
  2788. return !(Op < APFloat(-149.0f) || Op > APFloat(127.0f));
  2789. break;
  2790. case LibFunc_sinl:
  2791. case LibFunc_sin:
  2792. case LibFunc_sinf:
  2793. case LibFunc_cosl:
  2794. case LibFunc_cos:
  2795. case LibFunc_cosf:
  2796. return !Op.isInfinity();
  2797. case LibFunc_tanl:
  2798. case LibFunc_tan:
  2799. case LibFunc_tanf: {
  2800. // FIXME: Stop using the host math library.
  2801. // FIXME: The computation isn't done in the right precision.
  2802. Type *Ty = OpC->getType();
  2803. if (Ty->isDoubleTy() || Ty->isFloatTy() || Ty->isHalfTy())
  2804. return ConstantFoldFP(tan, OpC->getValueAPF(), Ty) != nullptr;
  2805. break;
  2806. }
  2807. case LibFunc_asinl:
  2808. case LibFunc_asin:
  2809. case LibFunc_asinf:
  2810. case LibFunc_acosl:
  2811. case LibFunc_acos:
  2812. case LibFunc_acosf:
  2813. return !(Op < APFloat(Op.getSemantics(), "-1") ||
  2814. Op > APFloat(Op.getSemantics(), "1"));
  2815. case LibFunc_sinh:
  2816. case LibFunc_cosh:
  2817. case LibFunc_sinhf:
  2818. case LibFunc_coshf:
  2819. case LibFunc_sinhl:
  2820. case LibFunc_coshl:
  2821. // FIXME: These boundaries are slightly conservative.
  2822. if (OpC->getType()->isDoubleTy())
  2823. return !(Op < APFloat(-710.0) || Op > APFloat(710.0));
  2824. if (OpC->getType()->isFloatTy())
  2825. return !(Op < APFloat(-89.0f) || Op > APFloat(89.0f));
  2826. break;
  2827. case LibFunc_sqrtl:
  2828. case LibFunc_sqrt:
  2829. case LibFunc_sqrtf:
  2830. return Op.isNaN() || Op.isZero() || !Op.isNegative();
  2831. // FIXME: Add more functions: sqrt_finite, atanh, expm1, log1p,
  2832. // maybe others?
  2833. default:
  2834. break;
  2835. }
  2836. }
  2837. }
  2838. if (Call->arg_size() == 2) {
  2839. ConstantFP *Op0C = dyn_cast<ConstantFP>(Call->getArgOperand(0));
  2840. ConstantFP *Op1C = dyn_cast<ConstantFP>(Call->getArgOperand(1));
  2841. if (Op0C && Op1C) {
  2842. const APFloat &Op0 = Op0C->getValueAPF();
  2843. const APFloat &Op1 = Op1C->getValueAPF();
  2844. switch (Func) {
  2845. case LibFunc_powl:
  2846. case LibFunc_pow:
  2847. case LibFunc_powf: {
  2848. // FIXME: Stop using the host math library.
  2849. // FIXME: The computation isn't done in the right precision.
  2850. Type *Ty = Op0C->getType();
  2851. if (Ty->isDoubleTy() || Ty->isFloatTy() || Ty->isHalfTy()) {
  2852. if (Ty == Op1C->getType())
  2853. return ConstantFoldBinaryFP(pow, Op0, Op1, Ty) != nullptr;
  2854. }
  2855. break;
  2856. }
  2857. case LibFunc_fmodl:
  2858. case LibFunc_fmod:
  2859. case LibFunc_fmodf:
  2860. case LibFunc_remainderl:
  2861. case LibFunc_remainder:
  2862. case LibFunc_remainderf:
  2863. return Op0.isNaN() || Op1.isNaN() ||
  2864. (!Op0.isInfinity() && !Op1.isZero());
  2865. default:
  2866. break;
  2867. }
  2868. }
  2869. }
  2870. return false;
  2871. }
  2872. void TargetFolder::anchor() {}