ConstantFolding.cpp 123 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464
  1. //===-- ConstantFolding.cpp - Fold instructions into constants ------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines routines for folding instructions into constants.
  10. //
  11. // Also, to supplement the basic IR ConstantExpr simplifications,
  12. // this file defines some additional folding routines that can make use of
  13. // DataLayout information. These functions cannot go in IR due to library
  14. // dependency issues.
  15. //
  16. //===----------------------------------------------------------------------===//
  17. #include "llvm/Analysis/ConstantFolding.h"
  18. #include "llvm/ADT/APFloat.h"
  19. #include "llvm/ADT/APInt.h"
  20. #include "llvm/ADT/APSInt.h"
  21. #include "llvm/ADT/ArrayRef.h"
  22. #include "llvm/ADT/DenseMap.h"
  23. #include "llvm/ADT/STLExtras.h"
  24. #include "llvm/ADT/SmallVector.h"
  25. #include "llvm/ADT/StringRef.h"
  26. #include "llvm/Analysis/TargetFolder.h"
  27. #include "llvm/Analysis/TargetLibraryInfo.h"
  28. #include "llvm/Analysis/ValueTracking.h"
  29. #include "llvm/Analysis/VectorUtils.h"
  30. #include "llvm/Config/config.h"
  31. #include "llvm/IR/Constant.h"
  32. #include "llvm/IR/ConstantFold.h"
  33. #include "llvm/IR/Constants.h"
  34. #include "llvm/IR/DataLayout.h"
  35. #include "llvm/IR/DerivedTypes.h"
  36. #include "llvm/IR/Function.h"
  37. #include "llvm/IR/GlobalValue.h"
  38. #include "llvm/IR/GlobalVariable.h"
  39. #include "llvm/IR/InstrTypes.h"
  40. #include "llvm/IR/Instruction.h"
  41. #include "llvm/IR/Instructions.h"
  42. #include "llvm/IR/IntrinsicInst.h"
  43. #include "llvm/IR/Intrinsics.h"
  44. #include "llvm/IR/IntrinsicsAArch64.h"
  45. #include "llvm/IR/IntrinsicsAMDGPU.h"
  46. #include "llvm/IR/IntrinsicsARM.h"
  47. #include "llvm/IR/IntrinsicsWebAssembly.h"
  48. #include "llvm/IR/IntrinsicsX86.h"
  49. #include "llvm/IR/Operator.h"
  50. #include "llvm/IR/Type.h"
  51. #include "llvm/IR/Value.h"
  52. #include "llvm/Support/Casting.h"
  53. #include "llvm/Support/ErrorHandling.h"
  54. #include "llvm/Support/KnownBits.h"
  55. #include "llvm/Support/MathExtras.h"
  56. #include <cassert>
  57. #include <cerrno>
  58. #include <cfenv>
  59. #include <cmath>
  60. #include <cstdint>
  61. using namespace llvm;
  62. namespace {
  63. //===----------------------------------------------------------------------===//
  64. // Constant Folding internal helper functions
  65. //===----------------------------------------------------------------------===//
  66. static Constant *foldConstVectorToAPInt(APInt &Result, Type *DestTy,
  67. Constant *C, Type *SrcEltTy,
  68. unsigned NumSrcElts,
  69. const DataLayout &DL) {
  70. // Now that we know that the input value is a vector of integers, just shift
  71. // and insert them into our result.
  72. unsigned BitShift = DL.getTypeSizeInBits(SrcEltTy);
  73. for (unsigned i = 0; i != NumSrcElts; ++i) {
  74. Constant *Element;
  75. if (DL.isLittleEndian())
  76. Element = C->getAggregateElement(NumSrcElts - i - 1);
  77. else
  78. Element = C->getAggregateElement(i);
  79. if (Element && isa<UndefValue>(Element)) {
  80. Result <<= BitShift;
  81. continue;
  82. }
  83. auto *ElementCI = dyn_cast_or_null<ConstantInt>(Element);
  84. if (!ElementCI)
  85. return ConstantExpr::getBitCast(C, DestTy);
  86. Result <<= BitShift;
  87. Result |= ElementCI->getValue().zext(Result.getBitWidth());
  88. }
  89. return nullptr;
  90. }
  91. /// Constant fold bitcast, symbolically evaluating it with DataLayout.
  92. /// This always returns a non-null constant, but it may be a
  93. /// ConstantExpr if unfoldable.
  94. Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) {
  95. assert(CastInst::castIsValid(Instruction::BitCast, C, DestTy) &&
  96. "Invalid constantexpr bitcast!");
  97. // Catch the obvious splat cases.
  98. if (Constant *Res = ConstantFoldLoadFromUniformValue(C, DestTy))
  99. return Res;
  100. if (auto *VTy = dyn_cast<VectorType>(C->getType())) {
  101. // Handle a vector->scalar integer/fp cast.
  102. if (isa<IntegerType>(DestTy) || DestTy->isFloatingPointTy()) {
  103. unsigned NumSrcElts = cast<FixedVectorType>(VTy)->getNumElements();
  104. Type *SrcEltTy = VTy->getElementType();
  105. // If the vector is a vector of floating point, convert it to vector of int
  106. // to simplify things.
  107. if (SrcEltTy->isFloatingPointTy()) {
  108. unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits();
  109. auto *SrcIVTy = FixedVectorType::get(
  110. IntegerType::get(C->getContext(), FPWidth), NumSrcElts);
  111. // Ask IR to do the conversion now that #elts line up.
  112. C = ConstantExpr::getBitCast(C, SrcIVTy);
  113. }
  114. APInt Result(DL.getTypeSizeInBits(DestTy), 0);
  115. if (Constant *CE = foldConstVectorToAPInt(Result, DestTy, C,
  116. SrcEltTy, NumSrcElts, DL))
  117. return CE;
  118. if (isa<IntegerType>(DestTy))
  119. return ConstantInt::get(DestTy, Result);
  120. APFloat FP(DestTy->getFltSemantics(), Result);
  121. return ConstantFP::get(DestTy->getContext(), FP);
  122. }
  123. }
  124. // The code below only handles casts to vectors currently.
  125. auto *DestVTy = dyn_cast<VectorType>(DestTy);
  126. if (!DestVTy)
  127. return ConstantExpr::getBitCast(C, DestTy);
  128. // If this is a scalar -> vector cast, convert the input into a <1 x scalar>
  129. // vector so the code below can handle it uniformly.
  130. if (isa<ConstantFP>(C) || isa<ConstantInt>(C)) {
  131. Constant *Ops = C; // don't take the address of C!
  132. return FoldBitCast(ConstantVector::get(Ops), DestTy, DL);
  133. }
  134. // If this is a bitcast from constant vector -> vector, fold it.
  135. if (!isa<ConstantDataVector>(C) && !isa<ConstantVector>(C))
  136. return ConstantExpr::getBitCast(C, DestTy);
  137. // If the element types match, IR can fold it.
  138. unsigned NumDstElt = cast<FixedVectorType>(DestVTy)->getNumElements();
  139. unsigned NumSrcElt = cast<FixedVectorType>(C->getType())->getNumElements();
  140. if (NumDstElt == NumSrcElt)
  141. return ConstantExpr::getBitCast(C, DestTy);
  142. Type *SrcEltTy = cast<VectorType>(C->getType())->getElementType();
  143. Type *DstEltTy = DestVTy->getElementType();
  144. // Otherwise, we're changing the number of elements in a vector, which
  145. // requires endianness information to do the right thing. For example,
  146. // bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)
  147. // folds to (little endian):
  148. // <4 x i32> <i32 0, i32 0, i32 1, i32 0>
  149. // and to (big endian):
  150. // <4 x i32> <i32 0, i32 0, i32 0, i32 1>
  151. // First thing is first. We only want to think about integer here, so if
  152. // we have something in FP form, recast it as integer.
  153. if (DstEltTy->isFloatingPointTy()) {
  154. // Fold to an vector of integers with same size as our FP type.
  155. unsigned FPWidth = DstEltTy->getPrimitiveSizeInBits();
  156. auto *DestIVTy = FixedVectorType::get(
  157. IntegerType::get(C->getContext(), FPWidth), NumDstElt);
  158. // Recursively handle this integer conversion, if possible.
  159. C = FoldBitCast(C, DestIVTy, DL);
  160. // Finally, IR can handle this now that #elts line up.
  161. return ConstantExpr::getBitCast(C, DestTy);
  162. }
  163. // Okay, we know the destination is integer, if the input is FP, convert
  164. // it to integer first.
  165. if (SrcEltTy->isFloatingPointTy()) {
  166. unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits();
  167. auto *SrcIVTy = FixedVectorType::get(
  168. IntegerType::get(C->getContext(), FPWidth), NumSrcElt);
  169. // Ask IR to do the conversion now that #elts line up.
  170. C = ConstantExpr::getBitCast(C, SrcIVTy);
  171. // If IR wasn't able to fold it, bail out.
  172. if (!isa<ConstantVector>(C) && // FIXME: Remove ConstantVector.
  173. !isa<ConstantDataVector>(C))
  174. return C;
  175. }
  176. // Now we know that the input and output vectors are both integer vectors
  177. // of the same size, and that their #elements is not the same. Do the
  178. // conversion here, which depends on whether the input or output has
  179. // more elements.
  180. bool isLittleEndian = DL.isLittleEndian();
  181. SmallVector<Constant*, 32> Result;
  182. if (NumDstElt < NumSrcElt) {
  183. // Handle: bitcast (<4 x i32> <i32 0, i32 1, i32 2, i32 3> to <2 x i64>)
  184. Constant *Zero = Constant::getNullValue(DstEltTy);
  185. unsigned Ratio = NumSrcElt/NumDstElt;
  186. unsigned SrcBitSize = SrcEltTy->getPrimitiveSizeInBits();
  187. unsigned SrcElt = 0;
  188. for (unsigned i = 0; i != NumDstElt; ++i) {
  189. // Build each element of the result.
  190. Constant *Elt = Zero;
  191. unsigned ShiftAmt = isLittleEndian ? 0 : SrcBitSize*(Ratio-1);
  192. for (unsigned j = 0; j != Ratio; ++j) {
  193. Constant *Src = C->getAggregateElement(SrcElt++);
  194. if (Src && isa<UndefValue>(Src))
  195. Src = Constant::getNullValue(
  196. cast<VectorType>(C->getType())->getElementType());
  197. else
  198. Src = dyn_cast_or_null<ConstantInt>(Src);
  199. if (!Src) // Reject constantexpr elements.
  200. return ConstantExpr::getBitCast(C, DestTy);
  201. // Zero extend the element to the right size.
  202. Src = ConstantExpr::getZExt(Src, Elt->getType());
  203. // Shift it to the right place, depending on endianness.
  204. Src = ConstantExpr::getShl(Src,
  205. ConstantInt::get(Src->getType(), ShiftAmt));
  206. ShiftAmt += isLittleEndian ? SrcBitSize : -SrcBitSize;
  207. // Mix it in.
  208. Elt = ConstantExpr::getOr(Elt, Src);
  209. }
  210. Result.push_back(Elt);
  211. }
  212. return ConstantVector::get(Result);
  213. }
  214. // Handle: bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)
  215. unsigned Ratio = NumDstElt/NumSrcElt;
  216. unsigned DstBitSize = DL.getTypeSizeInBits(DstEltTy);
  217. // Loop over each source value, expanding into multiple results.
  218. for (unsigned i = 0; i != NumSrcElt; ++i) {
  219. auto *Element = C->getAggregateElement(i);
  220. if (!Element) // Reject constantexpr elements.
  221. return ConstantExpr::getBitCast(C, DestTy);
  222. if (isa<UndefValue>(Element)) {
  223. // Correctly Propagate undef values.
  224. Result.append(Ratio, UndefValue::get(DstEltTy));
  225. continue;
  226. }
  227. auto *Src = dyn_cast<ConstantInt>(Element);
  228. if (!Src)
  229. return ConstantExpr::getBitCast(C, DestTy);
  230. unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize*(Ratio-1);
  231. for (unsigned j = 0; j != Ratio; ++j) {
  232. // Shift the piece of the value into the right place, depending on
  233. // endianness.
  234. Constant *Elt = ConstantExpr::getLShr(Src,
  235. ConstantInt::get(Src->getType(), ShiftAmt));
  236. ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize;
  237. // Truncate the element to an integer with the same pointer size and
  238. // convert the element back to a pointer using a inttoptr.
  239. if (DstEltTy->isPointerTy()) {
  240. IntegerType *DstIntTy = Type::getIntNTy(C->getContext(), DstBitSize);
  241. Constant *CE = ConstantExpr::getTrunc(Elt, DstIntTy);
  242. Result.push_back(ConstantExpr::getIntToPtr(CE, DstEltTy));
  243. continue;
  244. }
  245. // Truncate and remember this piece.
  246. Result.push_back(ConstantExpr::getTrunc(Elt, DstEltTy));
  247. }
  248. }
  249. return ConstantVector::get(Result);
  250. }
  251. } // end anonymous namespace
  252. /// If this constant is a constant offset from a global, return the global and
  253. /// the constant. Because of constantexprs, this function is recursive.
  254. bool llvm::IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,
  255. APInt &Offset, const DataLayout &DL,
  256. DSOLocalEquivalent **DSOEquiv) {
  257. if (DSOEquiv)
  258. *DSOEquiv = nullptr;
  259. // Trivial case, constant is the global.
  260. if ((GV = dyn_cast<GlobalValue>(C))) {
  261. unsigned BitWidth = DL.getIndexTypeSizeInBits(GV->getType());
  262. Offset = APInt(BitWidth, 0);
  263. return true;
  264. }
  265. if (auto *FoundDSOEquiv = dyn_cast<DSOLocalEquivalent>(C)) {
  266. if (DSOEquiv)
  267. *DSOEquiv = FoundDSOEquiv;
  268. GV = FoundDSOEquiv->getGlobalValue();
  269. unsigned BitWidth = DL.getIndexTypeSizeInBits(GV->getType());
  270. Offset = APInt(BitWidth, 0);
  271. return true;
  272. }
  273. // Otherwise, if this isn't a constant expr, bail out.
  274. auto *CE = dyn_cast<ConstantExpr>(C);
  275. if (!CE) return false;
  276. // Look through ptr->int and ptr->ptr casts.
  277. if (CE->getOpcode() == Instruction::PtrToInt ||
  278. CE->getOpcode() == Instruction::BitCast)
  279. return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, DL,
  280. DSOEquiv);
  281. // i32* getelementptr ([5 x i32]* @a, i32 0, i32 5)
  282. auto *GEP = dyn_cast<GEPOperator>(CE);
  283. if (!GEP)
  284. return false;
  285. unsigned BitWidth = DL.getIndexTypeSizeInBits(GEP->getType());
  286. APInt TmpOffset(BitWidth, 0);
  287. // If the base isn't a global+constant, we aren't either.
  288. if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, TmpOffset, DL,
  289. DSOEquiv))
  290. return false;
  291. // Otherwise, add any offset that our operands provide.
  292. if (!GEP->accumulateConstantOffset(DL, TmpOffset))
  293. return false;
  294. Offset = TmpOffset;
  295. return true;
  296. }
  297. Constant *llvm::ConstantFoldLoadThroughBitcast(Constant *C, Type *DestTy,
  298. const DataLayout &DL) {
  299. do {
  300. Type *SrcTy = C->getType();
  301. if (SrcTy == DestTy)
  302. return C;
  303. TypeSize DestSize = DL.getTypeSizeInBits(DestTy);
  304. TypeSize SrcSize = DL.getTypeSizeInBits(SrcTy);
  305. if (!TypeSize::isKnownGE(SrcSize, DestSize))
  306. return nullptr;
  307. // Catch the obvious splat cases (since all-zeros can coerce non-integral
  308. // pointers legally).
  309. if (Constant *Res = ConstantFoldLoadFromUniformValue(C, DestTy))
  310. return Res;
  311. // If the type sizes are the same and a cast is legal, just directly
  312. // cast the constant.
  313. // But be careful not to coerce non-integral pointers illegally.
  314. if (SrcSize == DestSize &&
  315. DL.isNonIntegralPointerType(SrcTy->getScalarType()) ==
  316. DL.isNonIntegralPointerType(DestTy->getScalarType())) {
  317. Instruction::CastOps Cast = Instruction::BitCast;
  318. // If we are going from a pointer to int or vice versa, we spell the cast
  319. // differently.
  320. if (SrcTy->isIntegerTy() && DestTy->isPointerTy())
  321. Cast = Instruction::IntToPtr;
  322. else if (SrcTy->isPointerTy() && DestTy->isIntegerTy())
  323. Cast = Instruction::PtrToInt;
  324. if (CastInst::castIsValid(Cast, C, DestTy))
  325. return ConstantExpr::getCast(Cast, C, DestTy);
  326. }
  327. // If this isn't an aggregate type, there is nothing we can do to drill down
  328. // and find a bitcastable constant.
  329. if (!SrcTy->isAggregateType() && !SrcTy->isVectorTy())
  330. return nullptr;
  331. // We're simulating a load through a pointer that was bitcast to point to
  332. // a different type, so we can try to walk down through the initial
  333. // elements of an aggregate to see if some part of the aggregate is
  334. // castable to implement the "load" semantic model.
  335. if (SrcTy->isStructTy()) {
  336. // Struct types might have leading zero-length elements like [0 x i32],
  337. // which are certainly not what we are looking for, so skip them.
  338. unsigned Elem = 0;
  339. Constant *ElemC;
  340. do {
  341. ElemC = C->getAggregateElement(Elem++);
  342. } while (ElemC && DL.getTypeSizeInBits(ElemC->getType()).isZero());
  343. C = ElemC;
  344. } else {
  345. // For non-byte-sized vector elements, the first element is not
  346. // necessarily located at the vector base address.
  347. if (auto *VT = dyn_cast<VectorType>(SrcTy))
  348. if (!DL.typeSizeEqualsStoreSize(VT->getElementType()))
  349. return nullptr;
  350. C = C->getAggregateElement(0u);
  351. }
  352. } while (C);
  353. return nullptr;
  354. }
  355. namespace {
  356. /// Recursive helper to read bits out of global. C is the constant being copied
  357. /// out of. ByteOffset is an offset into C. CurPtr is the pointer to copy
  358. /// results into and BytesLeft is the number of bytes left in
  359. /// the CurPtr buffer. DL is the DataLayout.
  360. bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, unsigned char *CurPtr,
  361. unsigned BytesLeft, const DataLayout &DL) {
  362. assert(ByteOffset <= DL.getTypeAllocSize(C->getType()) &&
  363. "Out of range access");
  364. // If this element is zero or undefined, we can just return since *CurPtr is
  365. // zero initialized.
  366. if (isa<ConstantAggregateZero>(C) || isa<UndefValue>(C))
  367. return true;
  368. if (auto *CI = dyn_cast<ConstantInt>(C)) {
  369. if (CI->getBitWidth() > 64 ||
  370. (CI->getBitWidth() & 7) != 0)
  371. return false;
  372. uint64_t Val = CI->getZExtValue();
  373. unsigned IntBytes = unsigned(CI->getBitWidth()/8);
  374. for (unsigned i = 0; i != BytesLeft && ByteOffset != IntBytes; ++i) {
  375. int n = ByteOffset;
  376. if (!DL.isLittleEndian())
  377. n = IntBytes - n - 1;
  378. CurPtr[i] = (unsigned char)(Val >> (n * 8));
  379. ++ByteOffset;
  380. }
  381. return true;
  382. }
  383. if (auto *CFP = dyn_cast<ConstantFP>(C)) {
  384. if (CFP->getType()->isDoubleTy()) {
  385. C = FoldBitCast(C, Type::getInt64Ty(C->getContext()), DL);
  386. return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL);
  387. }
  388. if (CFP->getType()->isFloatTy()){
  389. C = FoldBitCast(C, Type::getInt32Ty(C->getContext()), DL);
  390. return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL);
  391. }
  392. if (CFP->getType()->isHalfTy()){
  393. C = FoldBitCast(C, Type::getInt16Ty(C->getContext()), DL);
  394. return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL);
  395. }
  396. return false;
  397. }
  398. if (auto *CS = dyn_cast<ConstantStruct>(C)) {
  399. const StructLayout *SL = DL.getStructLayout(CS->getType());
  400. unsigned Index = SL->getElementContainingOffset(ByteOffset);
  401. uint64_t CurEltOffset = SL->getElementOffset(Index);
  402. ByteOffset -= CurEltOffset;
  403. while (true) {
  404. // If the element access is to the element itself and not to tail padding,
  405. // read the bytes from the element.
  406. uint64_t EltSize = DL.getTypeAllocSize(CS->getOperand(Index)->getType());
  407. if (ByteOffset < EltSize &&
  408. !ReadDataFromGlobal(CS->getOperand(Index), ByteOffset, CurPtr,
  409. BytesLeft, DL))
  410. return false;
  411. ++Index;
  412. // Check to see if we read from the last struct element, if so we're done.
  413. if (Index == CS->getType()->getNumElements())
  414. return true;
  415. // If we read all of the bytes we needed from this element we're done.
  416. uint64_t NextEltOffset = SL->getElementOffset(Index);
  417. if (BytesLeft <= NextEltOffset - CurEltOffset - ByteOffset)
  418. return true;
  419. // Move to the next element of the struct.
  420. CurPtr += NextEltOffset - CurEltOffset - ByteOffset;
  421. BytesLeft -= NextEltOffset - CurEltOffset - ByteOffset;
  422. ByteOffset = 0;
  423. CurEltOffset = NextEltOffset;
  424. }
  425. // not reached.
  426. }
  427. if (isa<ConstantArray>(C) || isa<ConstantVector>(C) ||
  428. isa<ConstantDataSequential>(C)) {
  429. uint64_t NumElts;
  430. Type *EltTy;
  431. if (auto *AT = dyn_cast<ArrayType>(C->getType())) {
  432. NumElts = AT->getNumElements();
  433. EltTy = AT->getElementType();
  434. } else {
  435. NumElts = cast<FixedVectorType>(C->getType())->getNumElements();
  436. EltTy = cast<FixedVectorType>(C->getType())->getElementType();
  437. }
  438. uint64_t EltSize = DL.getTypeAllocSize(EltTy);
  439. uint64_t Index = ByteOffset / EltSize;
  440. uint64_t Offset = ByteOffset - Index * EltSize;
  441. for (; Index != NumElts; ++Index) {
  442. if (!ReadDataFromGlobal(C->getAggregateElement(Index), Offset, CurPtr,
  443. BytesLeft, DL))
  444. return false;
  445. uint64_t BytesWritten = EltSize - Offset;
  446. assert(BytesWritten <= EltSize && "Not indexing into this element?");
  447. if (BytesWritten >= BytesLeft)
  448. return true;
  449. Offset = 0;
  450. BytesLeft -= BytesWritten;
  451. CurPtr += BytesWritten;
  452. }
  453. return true;
  454. }
  455. if (auto *CE = dyn_cast<ConstantExpr>(C)) {
  456. if (CE->getOpcode() == Instruction::IntToPtr &&
  457. CE->getOperand(0)->getType() == DL.getIntPtrType(CE->getType())) {
  458. return ReadDataFromGlobal(CE->getOperand(0), ByteOffset, CurPtr,
  459. BytesLeft, DL);
  460. }
  461. }
  462. // Otherwise, unknown initializer type.
  463. return false;
  464. }
  465. Constant *FoldReinterpretLoadFromConst(Constant *C, Type *LoadTy,
  466. int64_t Offset, const DataLayout &DL) {
  467. // Bail out early. Not expect to load from scalable global variable.
  468. if (isa<ScalableVectorType>(LoadTy))
  469. return nullptr;
  470. auto *IntType = dyn_cast<IntegerType>(LoadTy);
  471. // If this isn't an integer load we can't fold it directly.
  472. if (!IntType) {
  473. // If this is a non-integer load, we can try folding it as an int load and
  474. // then bitcast the result. This can be useful for union cases. Note
  475. // that address spaces don't matter here since we're not going to result in
  476. // an actual new load.
  477. if (!LoadTy->isFloatingPointTy() && !LoadTy->isPointerTy() &&
  478. !LoadTy->isVectorTy())
  479. return nullptr;
  480. Type *MapTy = Type::getIntNTy(C->getContext(),
  481. DL.getTypeSizeInBits(LoadTy).getFixedValue());
  482. if (Constant *Res = FoldReinterpretLoadFromConst(C, MapTy, Offset, DL)) {
  483. if (Res->isNullValue() && !LoadTy->isX86_MMXTy() &&
  484. !LoadTy->isX86_AMXTy())
  485. // Materializing a zero can be done trivially without a bitcast
  486. return Constant::getNullValue(LoadTy);
  487. Type *CastTy = LoadTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(LoadTy) : LoadTy;
  488. Res = FoldBitCast(Res, CastTy, DL);
  489. if (LoadTy->isPtrOrPtrVectorTy()) {
  490. // For vector of pointer, we needed to first convert to a vector of integer, then do vector inttoptr
  491. if (Res->isNullValue() && !LoadTy->isX86_MMXTy() &&
  492. !LoadTy->isX86_AMXTy())
  493. return Constant::getNullValue(LoadTy);
  494. if (DL.isNonIntegralPointerType(LoadTy->getScalarType()))
  495. // Be careful not to replace a load of an addrspace value with an inttoptr here
  496. return nullptr;
  497. Res = ConstantExpr::getCast(Instruction::IntToPtr, Res, LoadTy);
  498. }
  499. return Res;
  500. }
  501. return nullptr;
  502. }
  503. unsigned BytesLoaded = (IntType->getBitWidth() + 7) / 8;
  504. if (BytesLoaded > 32 || BytesLoaded == 0)
  505. return nullptr;
  506. // If we're not accessing anything in this constant, the result is undefined.
  507. if (Offset <= -1 * static_cast<int64_t>(BytesLoaded))
  508. return PoisonValue::get(IntType);
  509. // TODO: We should be able to support scalable types.
  510. TypeSize InitializerSize = DL.getTypeAllocSize(C->getType());
  511. if (InitializerSize.isScalable())
  512. return nullptr;
  513. // If we're not accessing anything in this constant, the result is undefined.
  514. if (Offset >= (int64_t)InitializerSize.getFixedValue())
  515. return PoisonValue::get(IntType);
  516. unsigned char RawBytes[32] = {0};
  517. unsigned char *CurPtr = RawBytes;
  518. unsigned BytesLeft = BytesLoaded;
  519. // If we're loading off the beginning of the global, some bytes may be valid.
  520. if (Offset < 0) {
  521. CurPtr += -Offset;
  522. BytesLeft += Offset;
  523. Offset = 0;
  524. }
  525. if (!ReadDataFromGlobal(C, Offset, CurPtr, BytesLeft, DL))
  526. return nullptr;
  527. APInt ResultVal = APInt(IntType->getBitWidth(), 0);
  528. if (DL.isLittleEndian()) {
  529. ResultVal = RawBytes[BytesLoaded - 1];
  530. for (unsigned i = 1; i != BytesLoaded; ++i) {
  531. ResultVal <<= 8;
  532. ResultVal |= RawBytes[BytesLoaded - 1 - i];
  533. }
  534. } else {
  535. ResultVal = RawBytes[0];
  536. for (unsigned i = 1; i != BytesLoaded; ++i) {
  537. ResultVal <<= 8;
  538. ResultVal |= RawBytes[i];
  539. }
  540. }
  541. return ConstantInt::get(IntType->getContext(), ResultVal);
  542. }
  543. } // anonymous namespace
  544. // If GV is a constant with an initializer read its representation starting
  545. // at Offset and return it as a constant array of unsigned char. Otherwise
  546. // return null.
  547. Constant *llvm::ReadByteArrayFromGlobal(const GlobalVariable *GV,
  548. uint64_t Offset) {
  549. if (!GV->isConstant() || !GV->hasDefinitiveInitializer())
  550. return nullptr;
  551. const DataLayout &DL = GV->getParent()->getDataLayout();
  552. Constant *Init = const_cast<Constant *>(GV->getInitializer());
  553. TypeSize InitSize = DL.getTypeAllocSize(Init->getType());
  554. if (InitSize < Offset)
  555. return nullptr;
  556. uint64_t NBytes = InitSize - Offset;
  557. if (NBytes > UINT16_MAX)
  558. // Bail for large initializers in excess of 64K to avoid allocating
  559. // too much memory.
  560. // Offset is assumed to be less than or equal than InitSize (this
  561. // is enforced in ReadDataFromGlobal).
  562. return nullptr;
  563. SmallVector<unsigned char, 256> RawBytes(static_cast<size_t>(NBytes));
  564. unsigned char *CurPtr = RawBytes.data();
  565. if (!ReadDataFromGlobal(Init, Offset, CurPtr, NBytes, DL))
  566. return nullptr;
  567. return ConstantDataArray::get(GV->getContext(), RawBytes);
  568. }
  569. /// If this Offset points exactly to the start of an aggregate element, return
  570. /// that element, otherwise return nullptr.
  571. Constant *getConstantAtOffset(Constant *Base, APInt Offset,
  572. const DataLayout &DL) {
  573. if (Offset.isZero())
  574. return Base;
  575. if (!isa<ConstantAggregate>(Base) && !isa<ConstantDataSequential>(Base))
  576. return nullptr;
  577. Type *ElemTy = Base->getType();
  578. SmallVector<APInt> Indices = DL.getGEPIndicesForOffset(ElemTy, Offset);
  579. if (!Offset.isZero() || !Indices[0].isZero())
  580. return nullptr;
  581. Constant *C = Base;
  582. for (const APInt &Index : drop_begin(Indices)) {
  583. if (Index.isNegative() || Index.getActiveBits() >= 32)
  584. return nullptr;
  585. C = C->getAggregateElement(Index.getZExtValue());
  586. if (!C)
  587. return nullptr;
  588. }
  589. return C;
  590. }
  591. Constant *llvm::ConstantFoldLoadFromConst(Constant *C, Type *Ty,
  592. const APInt &Offset,
  593. const DataLayout &DL) {
  594. if (Constant *AtOffset = getConstantAtOffset(C, Offset, DL))
  595. if (Constant *Result = ConstantFoldLoadThroughBitcast(AtOffset, Ty, DL))
  596. return Result;
  597. // Explicitly check for out-of-bounds access, so we return poison even if the
  598. // constant is a uniform value.
  599. TypeSize Size = DL.getTypeAllocSize(C->getType());
  600. if (!Size.isScalable() && Offset.sge(Size.getFixedValue()))
  601. return PoisonValue::get(Ty);
  602. // Try an offset-independent fold of a uniform value.
  603. if (Constant *Result = ConstantFoldLoadFromUniformValue(C, Ty))
  604. return Result;
  605. // Try hard to fold loads from bitcasted strange and non-type-safe things.
  606. if (Offset.getMinSignedBits() <= 64)
  607. if (Constant *Result =
  608. FoldReinterpretLoadFromConst(C, Ty, Offset.getSExtValue(), DL))
  609. return Result;
  610. return nullptr;
  611. }
  612. Constant *llvm::ConstantFoldLoadFromConst(Constant *C, Type *Ty,
  613. const DataLayout &DL) {
  614. return ConstantFoldLoadFromConst(C, Ty, APInt(64, 0), DL);
  615. }
  616. Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty,
  617. APInt Offset,
  618. const DataLayout &DL) {
  619. C = cast<Constant>(C->stripAndAccumulateConstantOffsets(
  620. DL, Offset, /* AllowNonInbounds */ true));
  621. if (auto *GV = dyn_cast<GlobalVariable>(C))
  622. if (GV->isConstant() && GV->hasDefinitiveInitializer())
  623. if (Constant *Result = ConstantFoldLoadFromConst(GV->getInitializer(), Ty,
  624. Offset, DL))
  625. return Result;
  626. // If this load comes from anywhere in a uniform constant global, the value
  627. // is always the same, regardless of the loaded offset.
  628. if (auto *GV = dyn_cast<GlobalVariable>(getUnderlyingObject(C))) {
  629. if (GV->isConstant() && GV->hasDefinitiveInitializer()) {
  630. if (Constant *Res =
  631. ConstantFoldLoadFromUniformValue(GV->getInitializer(), Ty))
  632. return Res;
  633. }
  634. }
  635. return nullptr;
  636. }
  637. Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty,
  638. const DataLayout &DL) {
  639. APInt Offset(DL.getIndexTypeSizeInBits(C->getType()), 0);
  640. return ConstantFoldLoadFromConstPtr(C, Ty, Offset, DL);
  641. }
  642. Constant *llvm::ConstantFoldLoadFromUniformValue(Constant *C, Type *Ty) {
  643. if (isa<PoisonValue>(C))
  644. return PoisonValue::get(Ty);
  645. if (isa<UndefValue>(C))
  646. return UndefValue::get(Ty);
  647. if (C->isNullValue() && !Ty->isX86_MMXTy() && !Ty->isX86_AMXTy())
  648. return Constant::getNullValue(Ty);
  649. if (C->isAllOnesValue() &&
  650. (Ty->isIntOrIntVectorTy() || Ty->isFPOrFPVectorTy()))
  651. return Constant::getAllOnesValue(Ty);
  652. return nullptr;
  653. }
  654. namespace {
  655. /// One of Op0/Op1 is a constant expression.
  656. /// Attempt to symbolically evaluate the result of a binary operator merging
  657. /// these together. If target data info is available, it is provided as DL,
  658. /// otherwise DL is null.
  659. Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, Constant *Op1,
  660. const DataLayout &DL) {
  661. // SROA
  662. // Fold (and 0xffffffff00000000, (shl x, 32)) -> shl.
  663. // Fold (lshr (or X, Y), 32) -> (lshr [X/Y], 32) if one doesn't contribute
  664. // bits.
  665. if (Opc == Instruction::And) {
  666. KnownBits Known0 = computeKnownBits(Op0, DL);
  667. KnownBits Known1 = computeKnownBits(Op1, DL);
  668. if ((Known1.One | Known0.Zero).isAllOnes()) {
  669. // All the bits of Op0 that the 'and' could be masking are already zero.
  670. return Op0;
  671. }
  672. if ((Known0.One | Known1.Zero).isAllOnes()) {
  673. // All the bits of Op1 that the 'and' could be masking are already zero.
  674. return Op1;
  675. }
  676. Known0 &= Known1;
  677. if (Known0.isConstant())
  678. return ConstantInt::get(Op0->getType(), Known0.getConstant());
  679. }
  680. // If the constant expr is something like &A[123] - &A[4].f, fold this into a
  681. // constant. This happens frequently when iterating over a global array.
  682. if (Opc == Instruction::Sub) {
  683. GlobalValue *GV1, *GV2;
  684. APInt Offs1, Offs2;
  685. if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, DL))
  686. if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, DL) && GV1 == GV2) {
  687. unsigned OpSize = DL.getTypeSizeInBits(Op0->getType());
  688. // (&GV+C1) - (&GV+C2) -> C1-C2, pointer arithmetic cannot overflow.
  689. // PtrToInt may change the bitwidth so we have convert to the right size
  690. // first.
  691. return ConstantInt::get(Op0->getType(), Offs1.zextOrTrunc(OpSize) -
  692. Offs2.zextOrTrunc(OpSize));
  693. }
  694. }
  695. return nullptr;
  696. }
  697. /// If array indices are not pointer-sized integers, explicitly cast them so
  698. /// that they aren't implicitly casted by the getelementptr.
  699. Constant *CastGEPIndices(Type *SrcElemTy, ArrayRef<Constant *> Ops,
  700. Type *ResultTy, std::optional<unsigned> InRangeIndex,
  701. const DataLayout &DL, const TargetLibraryInfo *TLI) {
  702. Type *IntIdxTy = DL.getIndexType(ResultTy);
  703. Type *IntIdxScalarTy = IntIdxTy->getScalarType();
  704. bool Any = false;
  705. SmallVector<Constant*, 32> NewIdxs;
  706. for (unsigned i = 1, e = Ops.size(); i != e; ++i) {
  707. if ((i == 1 ||
  708. !isa<StructType>(GetElementPtrInst::getIndexedType(
  709. SrcElemTy, Ops.slice(1, i - 1)))) &&
  710. Ops[i]->getType()->getScalarType() != IntIdxScalarTy) {
  711. Any = true;
  712. Type *NewType = Ops[i]->getType()->isVectorTy()
  713. ? IntIdxTy
  714. : IntIdxScalarTy;
  715. NewIdxs.push_back(ConstantExpr::getCast(CastInst::getCastOpcode(Ops[i],
  716. true,
  717. NewType,
  718. true),
  719. Ops[i], NewType));
  720. } else
  721. NewIdxs.push_back(Ops[i]);
  722. }
  723. if (!Any)
  724. return nullptr;
  725. Constant *C = ConstantExpr::getGetElementPtr(
  726. SrcElemTy, Ops[0], NewIdxs, /*InBounds=*/false, InRangeIndex);
  727. return ConstantFoldConstant(C, DL, TLI);
  728. }
  729. /// Strip the pointer casts, but preserve the address space information.
  730. Constant *StripPtrCastKeepAS(Constant *Ptr) {
  731. assert(Ptr->getType()->isPointerTy() && "Not a pointer type");
  732. auto *OldPtrTy = cast<PointerType>(Ptr->getType());
  733. Ptr = cast<Constant>(Ptr->stripPointerCasts());
  734. auto *NewPtrTy = cast<PointerType>(Ptr->getType());
  735. // Preserve the address space number of the pointer.
  736. if (NewPtrTy->getAddressSpace() != OldPtrTy->getAddressSpace()) {
  737. Ptr = ConstantExpr::getPointerCast(
  738. Ptr, PointerType::getWithSamePointeeType(NewPtrTy,
  739. OldPtrTy->getAddressSpace()));
  740. }
  741. return Ptr;
  742. }
  743. /// If we can symbolically evaluate the GEP constant expression, do so.
  744. Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
  745. ArrayRef<Constant *> Ops,
  746. const DataLayout &DL,
  747. const TargetLibraryInfo *TLI) {
  748. const GEPOperator *InnermostGEP = GEP;
  749. bool InBounds = GEP->isInBounds();
  750. Type *SrcElemTy = GEP->getSourceElementType();
  751. Type *ResElemTy = GEP->getResultElementType();
  752. Type *ResTy = GEP->getType();
  753. if (!SrcElemTy->isSized() || isa<ScalableVectorType>(SrcElemTy))
  754. return nullptr;
  755. if (Constant *C = CastGEPIndices(SrcElemTy, Ops, ResTy,
  756. GEP->getInRangeIndex(), DL, TLI))
  757. return C;
  758. Constant *Ptr = Ops[0];
  759. if (!Ptr->getType()->isPointerTy())
  760. return nullptr;
  761. Type *IntIdxTy = DL.getIndexType(Ptr->getType());
  762. for (unsigned i = 1, e = Ops.size(); i != e; ++i)
  763. if (!isa<ConstantInt>(Ops[i]))
  764. return nullptr;
  765. unsigned BitWidth = DL.getTypeSizeInBits(IntIdxTy);
  766. APInt Offset = APInt(
  767. BitWidth,
  768. DL.getIndexedOffsetInType(
  769. SrcElemTy, ArrayRef((Value *const *)Ops.data() + 1, Ops.size() - 1)));
  770. Ptr = StripPtrCastKeepAS(Ptr);
  771. // If this is a GEP of a GEP, fold it all into a single GEP.
  772. while (auto *GEP = dyn_cast<GEPOperator>(Ptr)) {
  773. InnermostGEP = GEP;
  774. InBounds &= GEP->isInBounds();
  775. SmallVector<Value *, 4> NestedOps(llvm::drop_begin(GEP->operands()));
  776. // Do not try the incorporate the sub-GEP if some index is not a number.
  777. bool AllConstantInt = true;
  778. for (Value *NestedOp : NestedOps)
  779. if (!isa<ConstantInt>(NestedOp)) {
  780. AllConstantInt = false;
  781. break;
  782. }
  783. if (!AllConstantInt)
  784. break;
  785. Ptr = cast<Constant>(GEP->getOperand(0));
  786. SrcElemTy = GEP->getSourceElementType();
  787. Offset += APInt(BitWidth, DL.getIndexedOffsetInType(SrcElemTy, NestedOps));
  788. Ptr = StripPtrCastKeepAS(Ptr);
  789. }
  790. // If the base value for this address is a literal integer value, fold the
  791. // getelementptr to the resulting integer value casted to the pointer type.
  792. APInt BasePtr(BitWidth, 0);
  793. if (auto *CE = dyn_cast<ConstantExpr>(Ptr)) {
  794. if (CE->getOpcode() == Instruction::IntToPtr) {
  795. if (auto *Base = dyn_cast<ConstantInt>(CE->getOperand(0)))
  796. BasePtr = Base->getValue().zextOrTrunc(BitWidth);
  797. }
  798. }
  799. auto *PTy = cast<PointerType>(Ptr->getType());
  800. if ((Ptr->isNullValue() || BasePtr != 0) &&
  801. !DL.isNonIntegralPointerType(PTy)) {
  802. Constant *C = ConstantInt::get(Ptr->getContext(), Offset + BasePtr);
  803. return ConstantExpr::getIntToPtr(C, ResTy);
  804. }
  805. // Otherwise form a regular getelementptr. Recompute the indices so that
  806. // we eliminate over-indexing of the notional static type array bounds.
  807. // This makes it easy to determine if the getelementptr is "inbounds".
  808. // Also, this helps GlobalOpt do SROA on GlobalVariables.
  809. // For GEPs of GlobalValues, use the value type even for opaque pointers.
  810. // Otherwise use an i8 GEP.
  811. if (auto *GV = dyn_cast<GlobalValue>(Ptr))
  812. SrcElemTy = GV->getValueType();
  813. else if (!PTy->isOpaque())
  814. SrcElemTy = PTy->getNonOpaquePointerElementType();
  815. else
  816. SrcElemTy = Type::getInt8Ty(Ptr->getContext());
  817. if (!SrcElemTy->isSized())
  818. return nullptr;
  819. Type *ElemTy = SrcElemTy;
  820. SmallVector<APInt> Indices = DL.getGEPIndicesForOffset(ElemTy, Offset);
  821. if (Offset != 0)
  822. return nullptr;
  823. // Try to add additional zero indices to reach the desired result element
  824. // type.
  825. // TODO: Should we avoid extra zero indices if ResElemTy can't be reached and
  826. // we'll have to insert a bitcast anyway?
  827. while (ElemTy != ResElemTy) {
  828. Type *NextTy = GetElementPtrInst::getTypeAtIndex(ElemTy, (uint64_t)0);
  829. if (!NextTy)
  830. break;
  831. Indices.push_back(APInt::getZero(isa<StructType>(ElemTy) ? 32 : BitWidth));
  832. ElemTy = NextTy;
  833. }
  834. SmallVector<Constant *, 32> NewIdxs;
  835. for (const APInt &Index : Indices)
  836. NewIdxs.push_back(ConstantInt::get(
  837. Type::getIntNTy(Ptr->getContext(), Index.getBitWidth()), Index));
  838. // Preserve the inrange index from the innermost GEP if possible. We must
  839. // have calculated the same indices up to and including the inrange index.
  840. std::optional<unsigned> InRangeIndex;
  841. if (std::optional<unsigned> LastIRIndex = InnermostGEP->getInRangeIndex())
  842. if (SrcElemTy == InnermostGEP->getSourceElementType() &&
  843. NewIdxs.size() > *LastIRIndex) {
  844. InRangeIndex = LastIRIndex;
  845. for (unsigned I = 0; I <= *LastIRIndex; ++I)
  846. if (NewIdxs[I] != InnermostGEP->getOperand(I + 1))
  847. return nullptr;
  848. }
  849. // Create a GEP.
  850. Constant *C = ConstantExpr::getGetElementPtr(SrcElemTy, Ptr, NewIdxs,
  851. InBounds, InRangeIndex);
  852. assert(
  853. cast<PointerType>(C->getType())->isOpaqueOrPointeeTypeMatches(ElemTy) &&
  854. "Computed GetElementPtr has unexpected type!");
  855. // If we ended up indexing a member with a type that doesn't match
  856. // the type of what the original indices indexed, add a cast.
  857. if (C->getType() != ResTy)
  858. C = FoldBitCast(C, ResTy, DL);
  859. return C;
  860. }
  861. /// Attempt to constant fold an instruction with the
  862. /// specified opcode and operands. If successful, the constant result is
  863. /// returned, if not, null is returned. Note that this function can fail when
  864. /// attempting to fold instructions like loads and stores, which have no
  865. /// constant expression form.
  866. Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode,
  867. ArrayRef<Constant *> Ops,
  868. const DataLayout &DL,
  869. const TargetLibraryInfo *TLI) {
  870. Type *DestTy = InstOrCE->getType();
  871. if (Instruction::isUnaryOp(Opcode))
  872. return ConstantFoldUnaryOpOperand(Opcode, Ops[0], DL);
  873. if (Instruction::isBinaryOp(Opcode)) {
  874. switch (Opcode) {
  875. default:
  876. break;
  877. case Instruction::FAdd:
  878. case Instruction::FSub:
  879. case Instruction::FMul:
  880. case Instruction::FDiv:
  881. case Instruction::FRem:
  882. // Handle floating point instructions separately to account for denormals
  883. // TODO: If a constant expression is being folded rather than an
  884. // instruction, denormals will not be flushed/treated as zero
  885. if (const auto *I = dyn_cast<Instruction>(InstOrCE)) {
  886. return ConstantFoldFPInstOperands(Opcode, Ops[0], Ops[1], DL, I);
  887. }
  888. }
  889. return ConstantFoldBinaryOpOperands(Opcode, Ops[0], Ops[1], DL);
  890. }
  891. if (Instruction::isCast(Opcode))
  892. return ConstantFoldCastOperand(Opcode, Ops[0], DestTy, DL);
  893. if (auto *GEP = dyn_cast<GEPOperator>(InstOrCE)) {
  894. if (Constant *C = SymbolicallyEvaluateGEP(GEP, Ops, DL, TLI))
  895. return C;
  896. return ConstantExpr::getGetElementPtr(GEP->getSourceElementType(), Ops[0],
  897. Ops.slice(1), GEP->isInBounds(),
  898. GEP->getInRangeIndex());
  899. }
  900. if (auto *CE = dyn_cast<ConstantExpr>(InstOrCE)) {
  901. if (CE->isCompare())
  902. return ConstantFoldCompareInstOperands(CE->getPredicate(), Ops[0], Ops[1],
  903. DL, TLI);
  904. return CE->getWithOperands(Ops);
  905. }
  906. switch (Opcode) {
  907. default: return nullptr;
  908. case Instruction::ICmp:
  909. case Instruction::FCmp: {
  910. auto *C = cast<CmpInst>(InstOrCE);
  911. return ConstantFoldCompareInstOperands(C->getPredicate(), Ops[0], Ops[1],
  912. DL, TLI, C);
  913. }
  914. case Instruction::Freeze:
  915. return isGuaranteedNotToBeUndefOrPoison(Ops[0]) ? Ops[0] : nullptr;
  916. case Instruction::Call:
  917. if (auto *F = dyn_cast<Function>(Ops.back())) {
  918. const auto *Call = cast<CallBase>(InstOrCE);
  919. if (canConstantFoldCallTo(Call, F))
  920. return ConstantFoldCall(Call, F, Ops.slice(0, Ops.size() - 1), TLI);
  921. }
  922. return nullptr;
  923. case Instruction::Select:
  924. return ConstantExpr::getSelect(Ops[0], Ops[1], Ops[2]);
  925. case Instruction::ExtractElement:
  926. return ConstantExpr::getExtractElement(Ops[0], Ops[1]);
  927. case Instruction::ExtractValue:
  928. return ConstantFoldExtractValueInstruction(
  929. Ops[0], cast<ExtractValueInst>(InstOrCE)->getIndices());
  930. case Instruction::InsertElement:
  931. return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]);
  932. case Instruction::InsertValue:
  933. return ConstantFoldInsertValueInstruction(
  934. Ops[0], Ops[1], cast<InsertValueInst>(InstOrCE)->getIndices());
  935. case Instruction::ShuffleVector:
  936. return ConstantExpr::getShuffleVector(
  937. Ops[0], Ops[1], cast<ShuffleVectorInst>(InstOrCE)->getShuffleMask());
  938. case Instruction::Load: {
  939. const auto *LI = dyn_cast<LoadInst>(InstOrCE);
  940. if (LI->isVolatile())
  941. return nullptr;
  942. return ConstantFoldLoadFromConstPtr(Ops[0], LI->getType(), DL);
  943. }
  944. }
  945. }
  946. } // end anonymous namespace
  947. //===----------------------------------------------------------------------===//
  948. // Constant Folding public APIs
  949. //===----------------------------------------------------------------------===//
  950. namespace {
  951. Constant *
  952. ConstantFoldConstantImpl(const Constant *C, const DataLayout &DL,
  953. const TargetLibraryInfo *TLI,
  954. SmallDenseMap<Constant *, Constant *> &FoldedOps) {
  955. if (!isa<ConstantVector>(C) && !isa<ConstantExpr>(C))
  956. return const_cast<Constant *>(C);
  957. SmallVector<Constant *, 8> Ops;
  958. for (const Use &OldU : C->operands()) {
  959. Constant *OldC = cast<Constant>(&OldU);
  960. Constant *NewC = OldC;
  961. // Recursively fold the ConstantExpr's operands. If we have already folded
  962. // a ConstantExpr, we don't have to process it again.
  963. if (isa<ConstantVector>(OldC) || isa<ConstantExpr>(OldC)) {
  964. auto It = FoldedOps.find(OldC);
  965. if (It == FoldedOps.end()) {
  966. NewC = ConstantFoldConstantImpl(OldC, DL, TLI, FoldedOps);
  967. FoldedOps.insert({OldC, NewC});
  968. } else {
  969. NewC = It->second;
  970. }
  971. }
  972. Ops.push_back(NewC);
  973. }
  974. if (auto *CE = dyn_cast<ConstantExpr>(C)) {
  975. if (Constant *Res =
  976. ConstantFoldInstOperandsImpl(CE, CE->getOpcode(), Ops, DL, TLI))
  977. return Res;
  978. return const_cast<Constant *>(C);
  979. }
  980. assert(isa<ConstantVector>(C));
  981. return ConstantVector::get(Ops);
  982. }
  983. } // end anonymous namespace
  984. Constant *llvm::ConstantFoldInstruction(Instruction *I, const DataLayout &DL,
  985. const TargetLibraryInfo *TLI) {
  986. // Handle PHI nodes quickly here...
  987. if (auto *PN = dyn_cast<PHINode>(I)) {
  988. Constant *CommonValue = nullptr;
  989. SmallDenseMap<Constant *, Constant *> FoldedOps;
  990. for (Value *Incoming : PN->incoming_values()) {
  991. // If the incoming value is undef then skip it. Note that while we could
  992. // skip the value if it is equal to the phi node itself we choose not to
  993. // because that would break the rule that constant folding only applies if
  994. // all operands are constants.
  995. if (isa<UndefValue>(Incoming))
  996. continue;
  997. // If the incoming value is not a constant, then give up.
  998. auto *C = dyn_cast<Constant>(Incoming);
  999. if (!C)
  1000. return nullptr;
  1001. // Fold the PHI's operands.
  1002. C = ConstantFoldConstantImpl(C, DL, TLI, FoldedOps);
  1003. // If the incoming value is a different constant to
  1004. // the one we saw previously, then give up.
  1005. if (CommonValue && C != CommonValue)
  1006. return nullptr;
  1007. CommonValue = C;
  1008. }
  1009. // If we reach here, all incoming values are the same constant or undef.
  1010. return CommonValue ? CommonValue : UndefValue::get(PN->getType());
  1011. }
  1012. // Scan the operand list, checking to see if they are all constants, if so,
  1013. // hand off to ConstantFoldInstOperandsImpl.
  1014. if (!all_of(I->operands(), [](Use &U) { return isa<Constant>(U); }))
  1015. return nullptr;
  1016. SmallDenseMap<Constant *, Constant *> FoldedOps;
  1017. SmallVector<Constant *, 8> Ops;
  1018. for (const Use &OpU : I->operands()) {
  1019. auto *Op = cast<Constant>(&OpU);
  1020. // Fold the Instruction's operands.
  1021. Op = ConstantFoldConstantImpl(Op, DL, TLI, FoldedOps);
  1022. Ops.push_back(Op);
  1023. }
  1024. return ConstantFoldInstOperands(I, Ops, DL, TLI);
  1025. }
  1026. Constant *llvm::ConstantFoldConstant(const Constant *C, const DataLayout &DL,
  1027. const TargetLibraryInfo *TLI) {
  1028. SmallDenseMap<Constant *, Constant *> FoldedOps;
  1029. return ConstantFoldConstantImpl(C, DL, TLI, FoldedOps);
  1030. }
  1031. Constant *llvm::ConstantFoldInstOperands(Instruction *I,
  1032. ArrayRef<Constant *> Ops,
  1033. const DataLayout &DL,
  1034. const TargetLibraryInfo *TLI) {
  1035. return ConstantFoldInstOperandsImpl(I, I->getOpcode(), Ops, DL, TLI);
  1036. }
  1037. Constant *llvm::ConstantFoldCompareInstOperands(
  1038. unsigned IntPredicate, Constant *Ops0, Constant *Ops1, const DataLayout &DL,
  1039. const TargetLibraryInfo *TLI, const Instruction *I) {
  1040. CmpInst::Predicate Predicate = (CmpInst::Predicate)IntPredicate;
  1041. // fold: icmp (inttoptr x), null -> icmp x, 0
  1042. // fold: icmp null, (inttoptr x) -> icmp 0, x
  1043. // fold: icmp (ptrtoint x), 0 -> icmp x, null
  1044. // fold: icmp 0, (ptrtoint x) -> icmp null, x
  1045. // fold: icmp (inttoptr x), (inttoptr y) -> icmp trunc/zext x, trunc/zext y
  1046. // fold: icmp (ptrtoint x), (ptrtoint y) -> icmp x, y
  1047. //
  1048. // FIXME: The following comment is out of data and the DataLayout is here now.
  1049. // ConstantExpr::getCompare cannot do this, because it doesn't have DL
  1050. // around to know if bit truncation is happening.
  1051. if (auto *CE0 = dyn_cast<ConstantExpr>(Ops0)) {
  1052. if (Ops1->isNullValue()) {
  1053. if (CE0->getOpcode() == Instruction::IntToPtr) {
  1054. Type *IntPtrTy = DL.getIntPtrType(CE0->getType());
  1055. // Convert the integer value to the right size to ensure we get the
  1056. // proper extension or truncation.
  1057. Constant *C = ConstantExpr::getIntegerCast(CE0->getOperand(0),
  1058. IntPtrTy, false);
  1059. Constant *Null = Constant::getNullValue(C->getType());
  1060. return ConstantFoldCompareInstOperands(Predicate, C, Null, DL, TLI);
  1061. }
  1062. // Only do this transformation if the int is intptrty in size, otherwise
  1063. // there is a truncation or extension that we aren't modeling.
  1064. if (CE0->getOpcode() == Instruction::PtrToInt) {
  1065. Type *IntPtrTy = DL.getIntPtrType(CE0->getOperand(0)->getType());
  1066. if (CE0->getType() == IntPtrTy) {
  1067. Constant *C = CE0->getOperand(0);
  1068. Constant *Null = Constant::getNullValue(C->getType());
  1069. return ConstantFoldCompareInstOperands(Predicate, C, Null, DL, TLI);
  1070. }
  1071. }
  1072. }
  1073. if (auto *CE1 = dyn_cast<ConstantExpr>(Ops1)) {
  1074. if (CE0->getOpcode() == CE1->getOpcode()) {
  1075. if (CE0->getOpcode() == Instruction::IntToPtr) {
  1076. Type *IntPtrTy = DL.getIntPtrType(CE0->getType());
  1077. // Convert the integer value to the right size to ensure we get the
  1078. // proper extension or truncation.
  1079. Constant *C0 = ConstantExpr::getIntegerCast(CE0->getOperand(0),
  1080. IntPtrTy, false);
  1081. Constant *C1 = ConstantExpr::getIntegerCast(CE1->getOperand(0),
  1082. IntPtrTy, false);
  1083. return ConstantFoldCompareInstOperands(Predicate, C0, C1, DL, TLI);
  1084. }
  1085. // Only do this transformation if the int is intptrty in size, otherwise
  1086. // there is a truncation or extension that we aren't modeling.
  1087. if (CE0->getOpcode() == Instruction::PtrToInt) {
  1088. Type *IntPtrTy = DL.getIntPtrType(CE0->getOperand(0)->getType());
  1089. if (CE0->getType() == IntPtrTy &&
  1090. CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType()) {
  1091. return ConstantFoldCompareInstOperands(
  1092. Predicate, CE0->getOperand(0), CE1->getOperand(0), DL, TLI);
  1093. }
  1094. }
  1095. }
  1096. }
  1097. // icmp eq (or x, y), 0 -> (icmp eq x, 0) & (icmp eq y, 0)
  1098. // icmp ne (or x, y), 0 -> (icmp ne x, 0) | (icmp ne y, 0)
  1099. if ((Predicate == ICmpInst::ICMP_EQ || Predicate == ICmpInst::ICMP_NE) &&
  1100. CE0->getOpcode() == Instruction::Or && Ops1->isNullValue()) {
  1101. Constant *LHS = ConstantFoldCompareInstOperands(
  1102. Predicate, CE0->getOperand(0), Ops1, DL, TLI);
  1103. Constant *RHS = ConstantFoldCompareInstOperands(
  1104. Predicate, CE0->getOperand(1), Ops1, DL, TLI);
  1105. unsigned OpC =
  1106. Predicate == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or;
  1107. return ConstantFoldBinaryOpOperands(OpC, LHS, RHS, DL);
  1108. }
  1109. // Convert pointer comparison (base+offset1) pred (base+offset2) into
  1110. // offset1 pred offset2, for the case where the offset is inbounds. This
  1111. // only works for equality and unsigned comparison, as inbounds permits
  1112. // crossing the sign boundary. However, the offset comparison itself is
  1113. // signed.
  1114. if (Ops0->getType()->isPointerTy() && !ICmpInst::isSigned(Predicate)) {
  1115. unsigned IndexWidth = DL.getIndexTypeSizeInBits(Ops0->getType());
  1116. APInt Offset0(IndexWidth, 0);
  1117. Value *Stripped0 =
  1118. Ops0->stripAndAccumulateInBoundsConstantOffsets(DL, Offset0);
  1119. APInt Offset1(IndexWidth, 0);
  1120. Value *Stripped1 =
  1121. Ops1->stripAndAccumulateInBoundsConstantOffsets(DL, Offset1);
  1122. if (Stripped0 == Stripped1)
  1123. return ConstantExpr::getCompare(
  1124. ICmpInst::getSignedPredicate(Predicate),
  1125. ConstantInt::get(CE0->getContext(), Offset0),
  1126. ConstantInt::get(CE0->getContext(), Offset1));
  1127. }
  1128. } else if (isa<ConstantExpr>(Ops1)) {
  1129. // If RHS is a constant expression, but the left side isn't, swap the
  1130. // operands and try again.
  1131. Predicate = ICmpInst::getSwappedPredicate(Predicate);
  1132. return ConstantFoldCompareInstOperands(Predicate, Ops1, Ops0, DL, TLI);
  1133. }
  1134. // Flush any denormal constant float input according to denormal handling
  1135. // mode.
  1136. Ops0 = FlushFPConstant(Ops0, I, /* IsOutput */ false);
  1137. Ops1 = FlushFPConstant(Ops1, I, /* IsOutput */ false);
  1138. return ConstantExpr::getCompare(Predicate, Ops0, Ops1);
  1139. }
  1140. Constant *llvm::ConstantFoldUnaryOpOperand(unsigned Opcode, Constant *Op,
  1141. const DataLayout &DL) {
  1142. assert(Instruction::isUnaryOp(Opcode));
  1143. return ConstantFoldUnaryInstruction(Opcode, Op);
  1144. }
  1145. Constant *llvm::ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS,
  1146. Constant *RHS,
  1147. const DataLayout &DL) {
  1148. assert(Instruction::isBinaryOp(Opcode));
  1149. if (isa<ConstantExpr>(LHS) || isa<ConstantExpr>(RHS))
  1150. if (Constant *C = SymbolicallyEvaluateBinop(Opcode, LHS, RHS, DL))
  1151. return C;
  1152. if (ConstantExpr::isDesirableBinOp(Opcode))
  1153. return ConstantExpr::get(Opcode, LHS, RHS);
  1154. return ConstantFoldBinaryInstruction(Opcode, LHS, RHS);
  1155. }
  1156. Constant *llvm::FlushFPConstant(Constant *Operand, const Instruction *I,
  1157. bool IsOutput) {
  1158. if (!I || !I->getParent() || !I->getFunction())
  1159. return Operand;
  1160. ConstantFP *CFP = dyn_cast<ConstantFP>(Operand);
  1161. if (!CFP)
  1162. return Operand;
  1163. const APFloat &APF = CFP->getValueAPF();
  1164. Type *Ty = CFP->getType();
  1165. DenormalMode DenormMode =
  1166. I->getFunction()->getDenormalMode(Ty->getFltSemantics());
  1167. DenormalMode::DenormalModeKind Mode =
  1168. IsOutput ? DenormMode.Output : DenormMode.Input;
  1169. switch (Mode) {
  1170. default:
  1171. llvm_unreachable("unknown denormal mode");
  1172. return Operand;
  1173. case DenormalMode::IEEE:
  1174. return Operand;
  1175. case DenormalMode::PreserveSign:
  1176. if (APF.isDenormal()) {
  1177. return ConstantFP::get(
  1178. Ty->getContext(),
  1179. APFloat::getZero(Ty->getFltSemantics(), APF.isNegative()));
  1180. }
  1181. return Operand;
  1182. case DenormalMode::PositiveZero:
  1183. if (APF.isDenormal()) {
  1184. return ConstantFP::get(Ty->getContext(),
  1185. APFloat::getZero(Ty->getFltSemantics(), false));
  1186. }
  1187. return Operand;
  1188. }
  1189. return Operand;
  1190. }
  1191. Constant *llvm::ConstantFoldFPInstOperands(unsigned Opcode, Constant *LHS,
  1192. Constant *RHS, const DataLayout &DL,
  1193. const Instruction *I) {
  1194. if (Instruction::isBinaryOp(Opcode)) {
  1195. // Flush denormal inputs if needed.
  1196. Constant *Op0 = FlushFPConstant(LHS, I, /* IsOutput */ false);
  1197. Constant *Op1 = FlushFPConstant(RHS, I, /* IsOutput */ false);
  1198. // Calculate constant result.
  1199. Constant *C = ConstantFoldBinaryOpOperands(Opcode, Op0, Op1, DL);
  1200. if (!C)
  1201. return nullptr;
  1202. // Flush denormal output if needed.
  1203. return FlushFPConstant(C, I, /* IsOutput */ true);
  1204. }
  1205. // If instruction lacks a parent/function and the denormal mode cannot be
  1206. // determined, use the default (IEEE).
  1207. return ConstantFoldBinaryOpOperands(Opcode, LHS, RHS, DL);
  1208. }
  1209. Constant *llvm::ConstantFoldCastOperand(unsigned Opcode, Constant *C,
  1210. Type *DestTy, const DataLayout &DL) {
  1211. assert(Instruction::isCast(Opcode));
  1212. switch (Opcode) {
  1213. default:
  1214. llvm_unreachable("Missing case");
  1215. case Instruction::PtrToInt:
  1216. if (auto *CE = dyn_cast<ConstantExpr>(C)) {
  1217. Constant *FoldedValue = nullptr;
  1218. // If the input is a inttoptr, eliminate the pair. This requires knowing
  1219. // the width of a pointer, so it can't be done in ConstantExpr::getCast.
  1220. if (CE->getOpcode() == Instruction::IntToPtr) {
  1221. // zext/trunc the inttoptr to pointer size.
  1222. FoldedValue = ConstantExpr::getIntegerCast(
  1223. CE->getOperand(0), DL.getIntPtrType(CE->getType()),
  1224. /*IsSigned=*/false);
  1225. } else if (auto *GEP = dyn_cast<GEPOperator>(CE)) {
  1226. // If we have GEP, we can perform the following folds:
  1227. // (ptrtoint (gep null, x)) -> x
  1228. // (ptrtoint (gep (gep null, x), y) -> x + y, etc.
  1229. unsigned BitWidth = DL.getIndexTypeSizeInBits(GEP->getType());
  1230. APInt BaseOffset(BitWidth, 0);
  1231. auto *Base = cast<Constant>(GEP->stripAndAccumulateConstantOffsets(
  1232. DL, BaseOffset, /*AllowNonInbounds=*/true));
  1233. if (Base->isNullValue()) {
  1234. FoldedValue = ConstantInt::get(CE->getContext(), BaseOffset);
  1235. } else {
  1236. // ptrtoint (gep i8, Ptr, (sub 0, V)) -> sub (ptrtoint Ptr), V
  1237. if (GEP->getNumIndices() == 1 &&
  1238. GEP->getSourceElementType()->isIntegerTy(8)) {
  1239. auto *Ptr = cast<Constant>(GEP->getPointerOperand());
  1240. auto *Sub = dyn_cast<ConstantExpr>(GEP->getOperand(1));
  1241. Type *IntIdxTy = DL.getIndexType(Ptr->getType());
  1242. if (Sub && Sub->getType() == IntIdxTy &&
  1243. Sub->getOpcode() == Instruction::Sub &&
  1244. Sub->getOperand(0)->isNullValue())
  1245. FoldedValue = ConstantExpr::getSub(
  1246. ConstantExpr::getPtrToInt(Ptr, IntIdxTy), Sub->getOperand(1));
  1247. }
  1248. }
  1249. }
  1250. if (FoldedValue) {
  1251. // Do a zext or trunc to get to the ptrtoint dest size.
  1252. return ConstantExpr::getIntegerCast(FoldedValue, DestTy,
  1253. /*IsSigned=*/false);
  1254. }
  1255. }
  1256. return ConstantExpr::getCast(Opcode, C, DestTy);
  1257. case Instruction::IntToPtr:
  1258. // If the input is a ptrtoint, turn the pair into a ptr to ptr bitcast if
  1259. // the int size is >= the ptr size and the address spaces are the same.
  1260. // This requires knowing the width of a pointer, so it can't be done in
  1261. // ConstantExpr::getCast.
  1262. if (auto *CE = dyn_cast<ConstantExpr>(C)) {
  1263. if (CE->getOpcode() == Instruction::PtrToInt) {
  1264. Constant *SrcPtr = CE->getOperand(0);
  1265. unsigned SrcPtrSize = DL.getPointerTypeSizeInBits(SrcPtr->getType());
  1266. unsigned MidIntSize = CE->getType()->getScalarSizeInBits();
  1267. if (MidIntSize >= SrcPtrSize) {
  1268. unsigned SrcAS = SrcPtr->getType()->getPointerAddressSpace();
  1269. if (SrcAS == DestTy->getPointerAddressSpace())
  1270. return FoldBitCast(CE->getOperand(0), DestTy, DL);
  1271. }
  1272. }
  1273. }
  1274. return ConstantExpr::getCast(Opcode, C, DestTy);
  1275. case Instruction::Trunc:
  1276. case Instruction::ZExt:
  1277. case Instruction::SExt:
  1278. case Instruction::FPTrunc:
  1279. case Instruction::FPExt:
  1280. case Instruction::UIToFP:
  1281. case Instruction::SIToFP:
  1282. case Instruction::FPToUI:
  1283. case Instruction::FPToSI:
  1284. case Instruction::AddrSpaceCast:
  1285. return ConstantExpr::getCast(Opcode, C, DestTy);
  1286. case Instruction::BitCast:
  1287. return FoldBitCast(C, DestTy, DL);
  1288. }
  1289. }
  1290. //===----------------------------------------------------------------------===//
  1291. // Constant Folding for Calls
  1292. //
  1293. bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
  1294. if (Call->isNoBuiltin())
  1295. return false;
  1296. if (Call->getFunctionType() != F->getFunctionType())
  1297. return false;
  1298. switch (F->getIntrinsicID()) {
  1299. // Operations that do not operate floating-point numbers and do not depend on
  1300. // FP environment can be folded even in strictfp functions.
  1301. case Intrinsic::bswap:
  1302. case Intrinsic::ctpop:
  1303. case Intrinsic::ctlz:
  1304. case Intrinsic::cttz:
  1305. case Intrinsic::fshl:
  1306. case Intrinsic::fshr:
  1307. case Intrinsic::launder_invariant_group:
  1308. case Intrinsic::strip_invariant_group:
  1309. case Intrinsic::masked_load:
  1310. case Intrinsic::get_active_lane_mask:
  1311. case Intrinsic::abs:
  1312. case Intrinsic::smax:
  1313. case Intrinsic::smin:
  1314. case Intrinsic::umax:
  1315. case Intrinsic::umin:
  1316. case Intrinsic::sadd_with_overflow:
  1317. case Intrinsic::uadd_with_overflow:
  1318. case Intrinsic::ssub_with_overflow:
  1319. case Intrinsic::usub_with_overflow:
  1320. case Intrinsic::smul_with_overflow:
  1321. case Intrinsic::umul_with_overflow:
  1322. case Intrinsic::sadd_sat:
  1323. case Intrinsic::uadd_sat:
  1324. case Intrinsic::ssub_sat:
  1325. case Intrinsic::usub_sat:
  1326. case Intrinsic::smul_fix:
  1327. case Intrinsic::smul_fix_sat:
  1328. case Intrinsic::bitreverse:
  1329. case Intrinsic::is_constant:
  1330. case Intrinsic::vector_reduce_add:
  1331. case Intrinsic::vector_reduce_mul:
  1332. case Intrinsic::vector_reduce_and:
  1333. case Intrinsic::vector_reduce_or:
  1334. case Intrinsic::vector_reduce_xor:
  1335. case Intrinsic::vector_reduce_smin:
  1336. case Intrinsic::vector_reduce_smax:
  1337. case Intrinsic::vector_reduce_umin:
  1338. case Intrinsic::vector_reduce_umax:
  1339. // Target intrinsics
  1340. case Intrinsic::amdgcn_perm:
  1341. case Intrinsic::arm_mve_vctp8:
  1342. case Intrinsic::arm_mve_vctp16:
  1343. case Intrinsic::arm_mve_vctp32:
  1344. case Intrinsic::arm_mve_vctp64:
  1345. case Intrinsic::aarch64_sve_convert_from_svbool:
  1346. // WebAssembly float semantics are always known
  1347. case Intrinsic::wasm_trunc_signed:
  1348. case Intrinsic::wasm_trunc_unsigned:
  1349. return true;
  1350. // Floating point operations cannot be folded in strictfp functions in
  1351. // general case. They can be folded if FP environment is known to compiler.
  1352. case Intrinsic::minnum:
  1353. case Intrinsic::maxnum:
  1354. case Intrinsic::minimum:
  1355. case Intrinsic::maximum:
  1356. case Intrinsic::log:
  1357. case Intrinsic::log2:
  1358. case Intrinsic::log10:
  1359. case Intrinsic::exp:
  1360. case Intrinsic::exp2:
  1361. case Intrinsic::sqrt:
  1362. case Intrinsic::sin:
  1363. case Intrinsic::cos:
  1364. case Intrinsic::pow:
  1365. case Intrinsic::powi:
  1366. case Intrinsic::fma:
  1367. case Intrinsic::fmuladd:
  1368. case Intrinsic::fptoui_sat:
  1369. case Intrinsic::fptosi_sat:
  1370. case Intrinsic::convert_from_fp16:
  1371. case Intrinsic::convert_to_fp16:
  1372. case Intrinsic::amdgcn_cos:
  1373. case Intrinsic::amdgcn_cubeid:
  1374. case Intrinsic::amdgcn_cubema:
  1375. case Intrinsic::amdgcn_cubesc:
  1376. case Intrinsic::amdgcn_cubetc:
  1377. case Intrinsic::amdgcn_fmul_legacy:
  1378. case Intrinsic::amdgcn_fma_legacy:
  1379. case Intrinsic::amdgcn_fract:
  1380. case Intrinsic::amdgcn_ldexp:
  1381. case Intrinsic::amdgcn_sin:
  1382. // The intrinsics below depend on rounding mode in MXCSR.
  1383. case Intrinsic::x86_sse_cvtss2si:
  1384. case Intrinsic::x86_sse_cvtss2si64:
  1385. case Intrinsic::x86_sse_cvttss2si:
  1386. case Intrinsic::x86_sse_cvttss2si64:
  1387. case Intrinsic::x86_sse2_cvtsd2si:
  1388. case Intrinsic::x86_sse2_cvtsd2si64:
  1389. case Intrinsic::x86_sse2_cvttsd2si:
  1390. case Intrinsic::x86_sse2_cvttsd2si64:
  1391. case Intrinsic::x86_avx512_vcvtss2si32:
  1392. case Intrinsic::x86_avx512_vcvtss2si64:
  1393. case Intrinsic::x86_avx512_cvttss2si:
  1394. case Intrinsic::x86_avx512_cvttss2si64:
  1395. case Intrinsic::x86_avx512_vcvtsd2si32:
  1396. case Intrinsic::x86_avx512_vcvtsd2si64:
  1397. case Intrinsic::x86_avx512_cvttsd2si:
  1398. case Intrinsic::x86_avx512_cvttsd2si64:
  1399. case Intrinsic::x86_avx512_vcvtss2usi32:
  1400. case Intrinsic::x86_avx512_vcvtss2usi64:
  1401. case Intrinsic::x86_avx512_cvttss2usi:
  1402. case Intrinsic::x86_avx512_cvttss2usi64:
  1403. case Intrinsic::x86_avx512_vcvtsd2usi32:
  1404. case Intrinsic::x86_avx512_vcvtsd2usi64:
  1405. case Intrinsic::x86_avx512_cvttsd2usi:
  1406. case Intrinsic::x86_avx512_cvttsd2usi64:
  1407. return !Call->isStrictFP();
  1408. // Sign operations are actually bitwise operations, they do not raise
  1409. // exceptions even for SNANs.
  1410. case Intrinsic::fabs:
  1411. case Intrinsic::copysign:
  1412. case Intrinsic::is_fpclass:
  1413. // Non-constrained variants of rounding operations means default FP
  1414. // environment, they can be folded in any case.
  1415. case Intrinsic::ceil:
  1416. case Intrinsic::floor:
  1417. case Intrinsic::round:
  1418. case Intrinsic::roundeven:
  1419. case Intrinsic::trunc:
  1420. case Intrinsic::nearbyint:
  1421. case Intrinsic::rint:
  1422. case Intrinsic::canonicalize:
  1423. // Constrained intrinsics can be folded if FP environment is known
  1424. // to compiler.
  1425. case Intrinsic::experimental_constrained_fma:
  1426. case Intrinsic::experimental_constrained_fmuladd:
  1427. case Intrinsic::experimental_constrained_fadd:
  1428. case Intrinsic::experimental_constrained_fsub:
  1429. case Intrinsic::experimental_constrained_fmul:
  1430. case Intrinsic::experimental_constrained_fdiv:
  1431. case Intrinsic::experimental_constrained_frem:
  1432. case Intrinsic::experimental_constrained_ceil:
  1433. case Intrinsic::experimental_constrained_floor:
  1434. case Intrinsic::experimental_constrained_round:
  1435. case Intrinsic::experimental_constrained_roundeven:
  1436. case Intrinsic::experimental_constrained_trunc:
  1437. case Intrinsic::experimental_constrained_nearbyint:
  1438. case Intrinsic::experimental_constrained_rint:
  1439. case Intrinsic::experimental_constrained_fcmp:
  1440. case Intrinsic::experimental_constrained_fcmps:
  1441. return true;
  1442. default:
  1443. return false;
  1444. case Intrinsic::not_intrinsic: break;
  1445. }
  1446. if (!F->hasName() || Call->isStrictFP())
  1447. return false;
  1448. // In these cases, the check of the length is required. We don't want to
  1449. // return true for a name like "cos\0blah" which strcmp would return equal to
  1450. // "cos", but has length 8.
  1451. StringRef Name = F->getName();
  1452. switch (Name[0]) {
  1453. default:
  1454. return false;
  1455. case 'a':
  1456. return Name == "acos" || Name == "acosf" ||
  1457. Name == "asin" || Name == "asinf" ||
  1458. Name == "atan" || Name == "atanf" ||
  1459. Name == "atan2" || Name == "atan2f";
  1460. case 'c':
  1461. return Name == "ceil" || Name == "ceilf" ||
  1462. Name == "cos" || Name == "cosf" ||
  1463. Name == "cosh" || Name == "coshf";
  1464. case 'e':
  1465. return Name == "exp" || Name == "expf" ||
  1466. Name == "exp2" || Name == "exp2f";
  1467. case 'f':
  1468. return Name == "fabs" || Name == "fabsf" ||
  1469. Name == "floor" || Name == "floorf" ||
  1470. Name == "fmod" || Name == "fmodf";
  1471. case 'l':
  1472. return Name == "log" || Name == "logf" ||
  1473. Name == "log2" || Name == "log2f" ||
  1474. Name == "log10" || Name == "log10f";
  1475. case 'n':
  1476. return Name == "nearbyint" || Name == "nearbyintf";
  1477. case 'p':
  1478. return Name == "pow" || Name == "powf";
  1479. case 'r':
  1480. return Name == "remainder" || Name == "remainderf" ||
  1481. Name == "rint" || Name == "rintf" ||
  1482. Name == "round" || Name == "roundf";
  1483. case 's':
  1484. return Name == "sin" || Name == "sinf" ||
  1485. Name == "sinh" || Name == "sinhf" ||
  1486. Name == "sqrt" || Name == "sqrtf";
  1487. case 't':
  1488. return Name == "tan" || Name == "tanf" ||
  1489. Name == "tanh" || Name == "tanhf" ||
  1490. Name == "trunc" || Name == "truncf";
  1491. case '_':
  1492. // Check for various function names that get used for the math functions
  1493. // when the header files are preprocessed with the macro
  1494. // __FINITE_MATH_ONLY__ enabled.
  1495. // The '12' here is the length of the shortest name that can match.
  1496. // We need to check the size before looking at Name[1] and Name[2]
  1497. // so we may as well check a limit that will eliminate mismatches.
  1498. if (Name.size() < 12 || Name[1] != '_')
  1499. return false;
  1500. switch (Name[2]) {
  1501. default:
  1502. return false;
  1503. case 'a':
  1504. return Name == "__acos_finite" || Name == "__acosf_finite" ||
  1505. Name == "__asin_finite" || Name == "__asinf_finite" ||
  1506. Name == "__atan2_finite" || Name == "__atan2f_finite";
  1507. case 'c':
  1508. return Name == "__cosh_finite" || Name == "__coshf_finite";
  1509. case 'e':
  1510. return Name == "__exp_finite" || Name == "__expf_finite" ||
  1511. Name == "__exp2_finite" || Name == "__exp2f_finite";
  1512. case 'l':
  1513. return Name == "__log_finite" || Name == "__logf_finite" ||
  1514. Name == "__log10_finite" || Name == "__log10f_finite";
  1515. case 'p':
  1516. return Name == "__pow_finite" || Name == "__powf_finite";
  1517. case 's':
  1518. return Name == "__sinh_finite" || Name == "__sinhf_finite";
  1519. }
  1520. }
  1521. }
  1522. namespace {
  1523. Constant *GetConstantFoldFPValue(double V, Type *Ty) {
  1524. if (Ty->isHalfTy() || Ty->isFloatTy()) {
  1525. APFloat APF(V);
  1526. bool unused;
  1527. APF.convert(Ty->getFltSemantics(), APFloat::rmNearestTiesToEven, &unused);
  1528. return ConstantFP::get(Ty->getContext(), APF);
  1529. }
  1530. if (Ty->isDoubleTy())
  1531. return ConstantFP::get(Ty->getContext(), APFloat(V));
  1532. llvm_unreachable("Can only constant fold half/float/double");
  1533. }
  1534. /// Clear the floating-point exception state.
  1535. inline void llvm_fenv_clearexcept() {
  1536. #if defined(HAVE_FENV_H) && HAVE_DECL_FE_ALL_EXCEPT
  1537. feclearexcept(FE_ALL_EXCEPT);
  1538. #endif
  1539. errno = 0;
  1540. }
  1541. /// Test if a floating-point exception was raised.
  1542. inline bool llvm_fenv_testexcept() {
  1543. int errno_val = errno;
  1544. if (errno_val == ERANGE || errno_val == EDOM)
  1545. return true;
  1546. #if defined(HAVE_FENV_H) && HAVE_DECL_FE_ALL_EXCEPT && HAVE_DECL_FE_INEXACT
  1547. if (fetestexcept(FE_ALL_EXCEPT & ~FE_INEXACT))
  1548. return true;
  1549. #endif
  1550. return false;
  1551. }
  1552. Constant *ConstantFoldFP(double (*NativeFP)(double), const APFloat &V,
  1553. Type *Ty) {
  1554. llvm_fenv_clearexcept();
  1555. double Result = NativeFP(V.convertToDouble());
  1556. if (llvm_fenv_testexcept()) {
  1557. llvm_fenv_clearexcept();
  1558. return nullptr;
  1559. }
  1560. return GetConstantFoldFPValue(Result, Ty);
  1561. }
  1562. Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double),
  1563. const APFloat &V, const APFloat &W, Type *Ty) {
  1564. llvm_fenv_clearexcept();
  1565. double Result = NativeFP(V.convertToDouble(), W.convertToDouble());
  1566. if (llvm_fenv_testexcept()) {
  1567. llvm_fenv_clearexcept();
  1568. return nullptr;
  1569. }
  1570. return GetConstantFoldFPValue(Result, Ty);
  1571. }
  1572. Constant *constantFoldVectorReduce(Intrinsic::ID IID, Constant *Op) {
  1573. FixedVectorType *VT = dyn_cast<FixedVectorType>(Op->getType());
  1574. if (!VT)
  1575. return nullptr;
  1576. // This isn't strictly necessary, but handle the special/common case of zero:
  1577. // all integer reductions of a zero input produce zero.
  1578. if (isa<ConstantAggregateZero>(Op))
  1579. return ConstantInt::get(VT->getElementType(), 0);
  1580. // This is the same as the underlying binops - poison propagates.
  1581. if (isa<PoisonValue>(Op) || Op->containsPoisonElement())
  1582. return PoisonValue::get(VT->getElementType());
  1583. // TODO: Handle undef.
  1584. if (!isa<ConstantVector>(Op) && !isa<ConstantDataVector>(Op))
  1585. return nullptr;
  1586. auto *EltC = dyn_cast<ConstantInt>(Op->getAggregateElement(0U));
  1587. if (!EltC)
  1588. return nullptr;
  1589. APInt Acc = EltC->getValue();
  1590. for (unsigned I = 1, E = VT->getNumElements(); I != E; I++) {
  1591. if (!(EltC = dyn_cast<ConstantInt>(Op->getAggregateElement(I))))
  1592. return nullptr;
  1593. const APInt &X = EltC->getValue();
  1594. switch (IID) {
  1595. case Intrinsic::vector_reduce_add:
  1596. Acc = Acc + X;
  1597. break;
  1598. case Intrinsic::vector_reduce_mul:
  1599. Acc = Acc * X;
  1600. break;
  1601. case Intrinsic::vector_reduce_and:
  1602. Acc = Acc & X;
  1603. break;
  1604. case Intrinsic::vector_reduce_or:
  1605. Acc = Acc | X;
  1606. break;
  1607. case Intrinsic::vector_reduce_xor:
  1608. Acc = Acc ^ X;
  1609. break;
  1610. case Intrinsic::vector_reduce_smin:
  1611. Acc = APIntOps::smin(Acc, X);
  1612. break;
  1613. case Intrinsic::vector_reduce_smax:
  1614. Acc = APIntOps::smax(Acc, X);
  1615. break;
  1616. case Intrinsic::vector_reduce_umin:
  1617. Acc = APIntOps::umin(Acc, X);
  1618. break;
  1619. case Intrinsic::vector_reduce_umax:
  1620. Acc = APIntOps::umax(Acc, X);
  1621. break;
  1622. }
  1623. }
  1624. return ConstantInt::get(Op->getContext(), Acc);
  1625. }
  1626. /// Attempt to fold an SSE floating point to integer conversion of a constant
  1627. /// floating point. If roundTowardZero is false, the default IEEE rounding is
  1628. /// used (toward nearest, ties to even). This matches the behavior of the
  1629. /// non-truncating SSE instructions in the default rounding mode. The desired
  1630. /// integer type Ty is used to select how many bits are available for the
  1631. /// result. Returns null if the conversion cannot be performed, otherwise
  1632. /// returns the Constant value resulting from the conversion.
  1633. Constant *ConstantFoldSSEConvertToInt(const APFloat &Val, bool roundTowardZero,
  1634. Type *Ty, bool IsSigned) {
  1635. // All of these conversion intrinsics form an integer of at most 64bits.
  1636. unsigned ResultWidth = Ty->getIntegerBitWidth();
  1637. assert(ResultWidth <= 64 &&
  1638. "Can only constant fold conversions to 64 and 32 bit ints");
  1639. uint64_t UIntVal;
  1640. bool isExact = false;
  1641. APFloat::roundingMode mode = roundTowardZero? APFloat::rmTowardZero
  1642. : APFloat::rmNearestTiesToEven;
  1643. APFloat::opStatus status =
  1644. Val.convertToInteger(MutableArrayRef(UIntVal), ResultWidth,
  1645. IsSigned, mode, &isExact);
  1646. if (status != APFloat::opOK &&
  1647. (!roundTowardZero || status != APFloat::opInexact))
  1648. return nullptr;
  1649. return ConstantInt::get(Ty, UIntVal, IsSigned);
  1650. }
  1651. double getValueAsDouble(ConstantFP *Op) {
  1652. Type *Ty = Op->getType();
  1653. if (Ty->isBFloatTy() || Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())
  1654. return Op->getValueAPF().convertToDouble();
  1655. bool unused;
  1656. APFloat APF = Op->getValueAPF();
  1657. APF.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &unused);
  1658. return APF.convertToDouble();
  1659. }
  1660. static bool getConstIntOrUndef(Value *Op, const APInt *&C) {
  1661. if (auto *CI = dyn_cast<ConstantInt>(Op)) {
  1662. C = &CI->getValue();
  1663. return true;
  1664. }
  1665. if (isa<UndefValue>(Op)) {
  1666. C = nullptr;
  1667. return true;
  1668. }
  1669. return false;
  1670. }
  1671. /// Checks if the given intrinsic call, which evaluates to constant, is allowed
  1672. /// to be folded.
  1673. ///
  1674. /// \param CI Constrained intrinsic call.
  1675. /// \param St Exception flags raised during constant evaluation.
  1676. static bool mayFoldConstrained(ConstrainedFPIntrinsic *CI,
  1677. APFloat::opStatus St) {
  1678. std::optional<RoundingMode> ORM = CI->getRoundingMode();
  1679. std::optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior();
  1680. // If the operation does not change exception status flags, it is safe
  1681. // to fold.
  1682. if (St == APFloat::opStatus::opOK)
  1683. return true;
  1684. // If evaluation raised FP exception, the result can depend on rounding
  1685. // mode. If the latter is unknown, folding is not possible.
  1686. if (ORM && *ORM == RoundingMode::Dynamic)
  1687. return false;
  1688. // If FP exceptions are ignored, fold the call, even if such exception is
  1689. // raised.
  1690. if (EB && *EB != fp::ExceptionBehavior::ebStrict)
  1691. return true;
  1692. // Leave the calculation for runtime so that exception flags be correctly set
  1693. // in hardware.
  1694. return false;
  1695. }
  1696. /// Returns the rounding mode that should be used for constant evaluation.
  1697. static RoundingMode
  1698. getEvaluationRoundingMode(const ConstrainedFPIntrinsic *CI) {
  1699. std::optional<RoundingMode> ORM = CI->getRoundingMode();
  1700. if (!ORM || *ORM == RoundingMode::Dynamic)
  1701. // Even if the rounding mode is unknown, try evaluating the operation.
  1702. // If it does not raise inexact exception, rounding was not applied,
  1703. // so the result is exact and does not depend on rounding mode. Whether
  1704. // other FP exceptions are raised, it does not depend on rounding mode.
  1705. return RoundingMode::NearestTiesToEven;
  1706. return *ORM;
  1707. }
  1708. /// Try to constant fold llvm.canonicalize for the given caller and value.
  1709. static Constant *constantFoldCanonicalize(const Type *Ty, const CallBase *CI,
  1710. const APFloat &Src) {
  1711. // Zero, positive and negative, is always OK to fold.
  1712. if (Src.isZero()) {
  1713. // Get a fresh 0, since ppc_fp128 does have non-canonical zeros.
  1714. return ConstantFP::get(
  1715. CI->getContext(),
  1716. APFloat::getZero(Src.getSemantics(), Src.isNegative()));
  1717. }
  1718. if (!Ty->isIEEELikeFPTy())
  1719. return nullptr;
  1720. // Zero is always canonical and the sign must be preserved.
  1721. //
  1722. // Denorms and nans may have special encodings, but it should be OK to fold a
  1723. // totally average number.
  1724. if (Src.isNormal() || Src.isInfinity())
  1725. return ConstantFP::get(CI->getContext(), Src);
  1726. if (Src.isDenormal() && CI->getParent() && CI->getFunction()) {
  1727. DenormalMode DenormMode =
  1728. CI->getFunction()->getDenormalMode(Src.getSemantics());
  1729. if (DenormMode == DenormalMode::getIEEE())
  1730. return nullptr;
  1731. bool IsPositive =
  1732. (!Src.isNegative() || DenormMode.Input == DenormalMode::PositiveZero ||
  1733. (DenormMode.Output == DenormalMode::PositiveZero &&
  1734. DenormMode.Input == DenormalMode::IEEE));
  1735. return ConstantFP::get(CI->getContext(),
  1736. APFloat::getZero(Src.getSemantics(), !IsPositive));
  1737. }
  1738. return nullptr;
  1739. }
  1740. static Constant *ConstantFoldScalarCall1(StringRef Name,
  1741. Intrinsic::ID IntrinsicID,
  1742. Type *Ty,
  1743. ArrayRef<Constant *> Operands,
  1744. const TargetLibraryInfo *TLI,
  1745. const CallBase *Call) {
  1746. assert(Operands.size() == 1 && "Wrong number of operands.");
  1747. if (IntrinsicID == Intrinsic::is_constant) {
  1748. // We know we have a "Constant" argument. But we want to only
  1749. // return true for manifest constants, not those that depend on
  1750. // constants with unknowable values, e.g. GlobalValue or BlockAddress.
  1751. if (Operands[0]->isManifestConstant())
  1752. return ConstantInt::getTrue(Ty->getContext());
  1753. return nullptr;
  1754. }
  1755. if (isa<PoisonValue>(Operands[0])) {
  1756. // TODO: All of these operations should probably propagate poison.
  1757. if (IntrinsicID == Intrinsic::canonicalize)
  1758. return PoisonValue::get(Ty);
  1759. }
  1760. if (isa<UndefValue>(Operands[0])) {
  1761. // cosine(arg) is between -1 and 1. cosine(invalid arg) is NaN.
  1762. // ctpop() is between 0 and bitwidth, pick 0 for undef.
  1763. // fptoui.sat and fptosi.sat can always fold to zero (for a zero input).
  1764. if (IntrinsicID == Intrinsic::cos ||
  1765. IntrinsicID == Intrinsic::ctpop ||
  1766. IntrinsicID == Intrinsic::fptoui_sat ||
  1767. IntrinsicID == Intrinsic::fptosi_sat ||
  1768. IntrinsicID == Intrinsic::canonicalize)
  1769. return Constant::getNullValue(Ty);
  1770. if (IntrinsicID == Intrinsic::bswap ||
  1771. IntrinsicID == Intrinsic::bitreverse ||
  1772. IntrinsicID == Intrinsic::launder_invariant_group ||
  1773. IntrinsicID == Intrinsic::strip_invariant_group)
  1774. return Operands[0];
  1775. }
  1776. if (isa<ConstantPointerNull>(Operands[0])) {
  1777. // launder(null) == null == strip(null) iff in addrspace 0
  1778. if (IntrinsicID == Intrinsic::launder_invariant_group ||
  1779. IntrinsicID == Intrinsic::strip_invariant_group) {
  1780. // If instruction is not yet put in a basic block (e.g. when cloning
  1781. // a function during inlining), Call's caller may not be available.
  1782. // So check Call's BB first before querying Call->getCaller.
  1783. const Function *Caller =
  1784. Call->getParent() ? Call->getCaller() : nullptr;
  1785. if (Caller &&
  1786. !NullPointerIsDefined(
  1787. Caller, Operands[0]->getType()->getPointerAddressSpace())) {
  1788. return Operands[0];
  1789. }
  1790. return nullptr;
  1791. }
  1792. }
  1793. if (auto *Op = dyn_cast<ConstantFP>(Operands[0])) {
  1794. if (IntrinsicID == Intrinsic::convert_to_fp16) {
  1795. APFloat Val(Op->getValueAPF());
  1796. bool lost = false;
  1797. Val.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &lost);
  1798. return ConstantInt::get(Ty->getContext(), Val.bitcastToAPInt());
  1799. }
  1800. APFloat U = Op->getValueAPF();
  1801. if (IntrinsicID == Intrinsic::wasm_trunc_signed ||
  1802. IntrinsicID == Intrinsic::wasm_trunc_unsigned) {
  1803. bool Signed = IntrinsicID == Intrinsic::wasm_trunc_signed;
  1804. if (U.isNaN())
  1805. return nullptr;
  1806. unsigned Width = Ty->getIntegerBitWidth();
  1807. APSInt Int(Width, !Signed);
  1808. bool IsExact = false;
  1809. APFloat::opStatus Status =
  1810. U.convertToInteger(Int, APFloat::rmTowardZero, &IsExact);
  1811. if (Status == APFloat::opOK || Status == APFloat::opInexact)
  1812. return ConstantInt::get(Ty, Int);
  1813. return nullptr;
  1814. }
  1815. if (IntrinsicID == Intrinsic::fptoui_sat ||
  1816. IntrinsicID == Intrinsic::fptosi_sat) {
  1817. // convertToInteger() already has the desired saturation semantics.
  1818. APSInt Int(Ty->getIntegerBitWidth(),
  1819. IntrinsicID == Intrinsic::fptoui_sat);
  1820. bool IsExact;
  1821. U.convertToInteger(Int, APFloat::rmTowardZero, &IsExact);
  1822. return ConstantInt::get(Ty, Int);
  1823. }
  1824. if (IntrinsicID == Intrinsic::canonicalize)
  1825. return constantFoldCanonicalize(Ty, Call, U);
  1826. if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())
  1827. return nullptr;
  1828. // Use internal versions of these intrinsics.
  1829. if (IntrinsicID == Intrinsic::nearbyint || IntrinsicID == Intrinsic::rint) {
  1830. U.roundToIntegral(APFloat::rmNearestTiesToEven);
  1831. return ConstantFP::get(Ty->getContext(), U);
  1832. }
  1833. if (IntrinsicID == Intrinsic::round) {
  1834. U.roundToIntegral(APFloat::rmNearestTiesToAway);
  1835. return ConstantFP::get(Ty->getContext(), U);
  1836. }
  1837. if (IntrinsicID == Intrinsic::roundeven) {
  1838. U.roundToIntegral(APFloat::rmNearestTiesToEven);
  1839. return ConstantFP::get(Ty->getContext(), U);
  1840. }
  1841. if (IntrinsicID == Intrinsic::ceil) {
  1842. U.roundToIntegral(APFloat::rmTowardPositive);
  1843. return ConstantFP::get(Ty->getContext(), U);
  1844. }
  1845. if (IntrinsicID == Intrinsic::floor) {
  1846. U.roundToIntegral(APFloat::rmTowardNegative);
  1847. return ConstantFP::get(Ty->getContext(), U);
  1848. }
  1849. if (IntrinsicID == Intrinsic::trunc) {
  1850. U.roundToIntegral(APFloat::rmTowardZero);
  1851. return ConstantFP::get(Ty->getContext(), U);
  1852. }
  1853. if (IntrinsicID == Intrinsic::fabs) {
  1854. U.clearSign();
  1855. return ConstantFP::get(Ty->getContext(), U);
  1856. }
  1857. if (IntrinsicID == Intrinsic::amdgcn_fract) {
  1858. // The v_fract instruction behaves like the OpenCL spec, which defines
  1859. // fract(x) as fmin(x - floor(x), 0x1.fffffep-1f): "The min() operator is
  1860. // there to prevent fract(-small) from returning 1.0. It returns the
  1861. // largest positive floating-point number less than 1.0."
  1862. APFloat FloorU(U);
  1863. FloorU.roundToIntegral(APFloat::rmTowardNegative);
  1864. APFloat FractU(U - FloorU);
  1865. APFloat AlmostOne(U.getSemantics(), 1);
  1866. AlmostOne.next(/*nextDown*/ true);
  1867. return ConstantFP::get(Ty->getContext(), minimum(FractU, AlmostOne));
  1868. }
  1869. // Rounding operations (floor, trunc, ceil, round and nearbyint) do not
  1870. // raise FP exceptions, unless the argument is signaling NaN.
  1871. std::optional<APFloat::roundingMode> RM;
  1872. switch (IntrinsicID) {
  1873. default:
  1874. break;
  1875. case Intrinsic::experimental_constrained_nearbyint:
  1876. case Intrinsic::experimental_constrained_rint: {
  1877. auto CI = cast<ConstrainedFPIntrinsic>(Call);
  1878. RM = CI->getRoundingMode();
  1879. if (!RM || *RM == RoundingMode::Dynamic)
  1880. return nullptr;
  1881. break;
  1882. }
  1883. case Intrinsic::experimental_constrained_round:
  1884. RM = APFloat::rmNearestTiesToAway;
  1885. break;
  1886. case Intrinsic::experimental_constrained_ceil:
  1887. RM = APFloat::rmTowardPositive;
  1888. break;
  1889. case Intrinsic::experimental_constrained_floor:
  1890. RM = APFloat::rmTowardNegative;
  1891. break;
  1892. case Intrinsic::experimental_constrained_trunc:
  1893. RM = APFloat::rmTowardZero;
  1894. break;
  1895. }
  1896. if (RM) {
  1897. auto CI = cast<ConstrainedFPIntrinsic>(Call);
  1898. if (U.isFinite()) {
  1899. APFloat::opStatus St = U.roundToIntegral(*RM);
  1900. if (IntrinsicID == Intrinsic::experimental_constrained_rint &&
  1901. St == APFloat::opInexact) {
  1902. std::optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior();
  1903. if (EB && *EB == fp::ebStrict)
  1904. return nullptr;
  1905. }
  1906. } else if (U.isSignaling()) {
  1907. std::optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior();
  1908. if (EB && *EB != fp::ebIgnore)
  1909. return nullptr;
  1910. U = APFloat::getQNaN(U.getSemantics());
  1911. }
  1912. return ConstantFP::get(Ty->getContext(), U);
  1913. }
  1914. /// We only fold functions with finite arguments. Folding NaN and inf is
  1915. /// likely to be aborted with an exception anyway, and some host libms
  1916. /// have known errors raising exceptions.
  1917. if (!U.isFinite())
  1918. return nullptr;
  1919. /// Currently APFloat versions of these functions do not exist, so we use
  1920. /// the host native double versions. Float versions are not called
  1921. /// directly but for all these it is true (float)(f((double)arg)) ==
  1922. /// f(arg). Long double not supported yet.
  1923. const APFloat &APF = Op->getValueAPF();
  1924. switch (IntrinsicID) {
  1925. default: break;
  1926. case Intrinsic::log:
  1927. return ConstantFoldFP(log, APF, Ty);
  1928. case Intrinsic::log2:
  1929. // TODO: What about hosts that lack a C99 library?
  1930. return ConstantFoldFP(log2, APF, Ty);
  1931. case Intrinsic::log10:
  1932. // TODO: What about hosts that lack a C99 library?
  1933. return ConstantFoldFP(log10, APF, Ty);
  1934. case Intrinsic::exp:
  1935. return ConstantFoldFP(exp, APF, Ty);
  1936. case Intrinsic::exp2:
  1937. // Fold exp2(x) as pow(2, x), in case the host lacks a C99 library.
  1938. return ConstantFoldBinaryFP(pow, APFloat(2.0), APF, Ty);
  1939. case Intrinsic::sin:
  1940. return ConstantFoldFP(sin, APF, Ty);
  1941. case Intrinsic::cos:
  1942. return ConstantFoldFP(cos, APF, Ty);
  1943. case Intrinsic::sqrt:
  1944. return ConstantFoldFP(sqrt, APF, Ty);
  1945. case Intrinsic::amdgcn_cos:
  1946. case Intrinsic::amdgcn_sin: {
  1947. double V = getValueAsDouble(Op);
  1948. if (V < -256.0 || V > 256.0)
  1949. // The gfx8 and gfx9 architectures handle arguments outside the range
  1950. // [-256, 256] differently. This should be a rare case so bail out
  1951. // rather than trying to handle the difference.
  1952. return nullptr;
  1953. bool IsCos = IntrinsicID == Intrinsic::amdgcn_cos;
  1954. double V4 = V * 4.0;
  1955. if (V4 == floor(V4)) {
  1956. // Force exact results for quarter-integer inputs.
  1957. const double SinVals[4] = { 0.0, 1.0, 0.0, -1.0 };
  1958. V = SinVals[((int)V4 + (IsCos ? 1 : 0)) & 3];
  1959. } else {
  1960. if (IsCos)
  1961. V = cos(V * 2.0 * numbers::pi);
  1962. else
  1963. V = sin(V * 2.0 * numbers::pi);
  1964. }
  1965. return GetConstantFoldFPValue(V, Ty);
  1966. }
  1967. }
  1968. if (!TLI)
  1969. return nullptr;
  1970. LibFunc Func = NotLibFunc;
  1971. if (!TLI->getLibFunc(Name, Func))
  1972. return nullptr;
  1973. switch (Func) {
  1974. default:
  1975. break;
  1976. case LibFunc_acos:
  1977. case LibFunc_acosf:
  1978. case LibFunc_acos_finite:
  1979. case LibFunc_acosf_finite:
  1980. if (TLI->has(Func))
  1981. return ConstantFoldFP(acos, APF, Ty);
  1982. break;
  1983. case LibFunc_asin:
  1984. case LibFunc_asinf:
  1985. case LibFunc_asin_finite:
  1986. case LibFunc_asinf_finite:
  1987. if (TLI->has(Func))
  1988. return ConstantFoldFP(asin, APF, Ty);
  1989. break;
  1990. case LibFunc_atan:
  1991. case LibFunc_atanf:
  1992. if (TLI->has(Func))
  1993. return ConstantFoldFP(atan, APF, Ty);
  1994. break;
  1995. case LibFunc_ceil:
  1996. case LibFunc_ceilf:
  1997. if (TLI->has(Func)) {
  1998. U.roundToIntegral(APFloat::rmTowardPositive);
  1999. return ConstantFP::get(Ty->getContext(), U);
  2000. }
  2001. break;
  2002. case LibFunc_cos:
  2003. case LibFunc_cosf:
  2004. if (TLI->has(Func))
  2005. return ConstantFoldFP(cos, APF, Ty);
  2006. break;
  2007. case LibFunc_cosh:
  2008. case LibFunc_coshf:
  2009. case LibFunc_cosh_finite:
  2010. case LibFunc_coshf_finite:
  2011. if (TLI->has(Func))
  2012. return ConstantFoldFP(cosh, APF, Ty);
  2013. break;
  2014. case LibFunc_exp:
  2015. case LibFunc_expf:
  2016. case LibFunc_exp_finite:
  2017. case LibFunc_expf_finite:
  2018. if (TLI->has(Func))
  2019. return ConstantFoldFP(exp, APF, Ty);
  2020. break;
  2021. case LibFunc_exp2:
  2022. case LibFunc_exp2f:
  2023. case LibFunc_exp2_finite:
  2024. case LibFunc_exp2f_finite:
  2025. if (TLI->has(Func))
  2026. // Fold exp2(x) as pow(2, x), in case the host lacks a C99 library.
  2027. return ConstantFoldBinaryFP(pow, APFloat(2.0), APF, Ty);
  2028. break;
  2029. case LibFunc_fabs:
  2030. case LibFunc_fabsf:
  2031. if (TLI->has(Func)) {
  2032. U.clearSign();
  2033. return ConstantFP::get(Ty->getContext(), U);
  2034. }
  2035. break;
  2036. case LibFunc_floor:
  2037. case LibFunc_floorf:
  2038. if (TLI->has(Func)) {
  2039. U.roundToIntegral(APFloat::rmTowardNegative);
  2040. return ConstantFP::get(Ty->getContext(), U);
  2041. }
  2042. break;
  2043. case LibFunc_log:
  2044. case LibFunc_logf:
  2045. case LibFunc_log_finite:
  2046. case LibFunc_logf_finite:
  2047. if (!APF.isNegative() && !APF.isZero() && TLI->has(Func))
  2048. return ConstantFoldFP(log, APF, Ty);
  2049. break;
  2050. case LibFunc_log2:
  2051. case LibFunc_log2f:
  2052. case LibFunc_log2_finite:
  2053. case LibFunc_log2f_finite:
  2054. if (!APF.isNegative() && !APF.isZero() && TLI->has(Func))
  2055. // TODO: What about hosts that lack a C99 library?
  2056. return ConstantFoldFP(log2, APF, Ty);
  2057. break;
  2058. case LibFunc_log10:
  2059. case LibFunc_log10f:
  2060. case LibFunc_log10_finite:
  2061. case LibFunc_log10f_finite:
  2062. if (!APF.isNegative() && !APF.isZero() && TLI->has(Func))
  2063. // TODO: What about hosts that lack a C99 library?
  2064. return ConstantFoldFP(log10, APF, Ty);
  2065. break;
  2066. case LibFunc_nearbyint:
  2067. case LibFunc_nearbyintf:
  2068. case LibFunc_rint:
  2069. case LibFunc_rintf:
  2070. if (TLI->has(Func)) {
  2071. U.roundToIntegral(APFloat::rmNearestTiesToEven);
  2072. return ConstantFP::get(Ty->getContext(), U);
  2073. }
  2074. break;
  2075. case LibFunc_round:
  2076. case LibFunc_roundf:
  2077. if (TLI->has(Func)) {
  2078. U.roundToIntegral(APFloat::rmNearestTiesToAway);
  2079. return ConstantFP::get(Ty->getContext(), U);
  2080. }
  2081. break;
  2082. case LibFunc_sin:
  2083. case LibFunc_sinf:
  2084. if (TLI->has(Func))
  2085. return ConstantFoldFP(sin, APF, Ty);
  2086. break;
  2087. case LibFunc_sinh:
  2088. case LibFunc_sinhf:
  2089. case LibFunc_sinh_finite:
  2090. case LibFunc_sinhf_finite:
  2091. if (TLI->has(Func))
  2092. return ConstantFoldFP(sinh, APF, Ty);
  2093. break;
  2094. case LibFunc_sqrt:
  2095. case LibFunc_sqrtf:
  2096. if (!APF.isNegative() && TLI->has(Func))
  2097. return ConstantFoldFP(sqrt, APF, Ty);
  2098. break;
  2099. case LibFunc_tan:
  2100. case LibFunc_tanf:
  2101. if (TLI->has(Func))
  2102. return ConstantFoldFP(tan, APF, Ty);
  2103. break;
  2104. case LibFunc_tanh:
  2105. case LibFunc_tanhf:
  2106. if (TLI->has(Func))
  2107. return ConstantFoldFP(tanh, APF, Ty);
  2108. break;
  2109. case LibFunc_trunc:
  2110. case LibFunc_truncf:
  2111. if (TLI->has(Func)) {
  2112. U.roundToIntegral(APFloat::rmTowardZero);
  2113. return ConstantFP::get(Ty->getContext(), U);
  2114. }
  2115. break;
  2116. }
  2117. return nullptr;
  2118. }
  2119. if (auto *Op = dyn_cast<ConstantInt>(Operands[0])) {
  2120. switch (IntrinsicID) {
  2121. case Intrinsic::bswap:
  2122. return ConstantInt::get(Ty->getContext(), Op->getValue().byteSwap());
  2123. case Intrinsic::ctpop:
  2124. return ConstantInt::get(Ty, Op->getValue().countPopulation());
  2125. case Intrinsic::bitreverse:
  2126. return ConstantInt::get(Ty->getContext(), Op->getValue().reverseBits());
  2127. case Intrinsic::convert_from_fp16: {
  2128. APFloat Val(APFloat::IEEEhalf(), Op->getValue());
  2129. bool lost = false;
  2130. APFloat::opStatus status = Val.convert(
  2131. Ty->getFltSemantics(), APFloat::rmNearestTiesToEven, &lost);
  2132. // Conversion is always precise.
  2133. (void)status;
  2134. assert(status != APFloat::opInexact && !lost &&
  2135. "Precision lost during fp16 constfolding");
  2136. return ConstantFP::get(Ty->getContext(), Val);
  2137. }
  2138. default:
  2139. return nullptr;
  2140. }
  2141. }
  2142. switch (IntrinsicID) {
  2143. default: break;
  2144. case Intrinsic::vector_reduce_add:
  2145. case Intrinsic::vector_reduce_mul:
  2146. case Intrinsic::vector_reduce_and:
  2147. case Intrinsic::vector_reduce_or:
  2148. case Intrinsic::vector_reduce_xor:
  2149. case Intrinsic::vector_reduce_smin:
  2150. case Intrinsic::vector_reduce_smax:
  2151. case Intrinsic::vector_reduce_umin:
  2152. case Intrinsic::vector_reduce_umax:
  2153. if (Constant *C = constantFoldVectorReduce(IntrinsicID, Operands[0]))
  2154. return C;
  2155. break;
  2156. }
  2157. // Support ConstantVector in case we have an Undef in the top.
  2158. if (isa<ConstantVector>(Operands[0]) ||
  2159. isa<ConstantDataVector>(Operands[0])) {
  2160. auto *Op = cast<Constant>(Operands[0]);
  2161. switch (IntrinsicID) {
  2162. default: break;
  2163. case Intrinsic::x86_sse_cvtss2si:
  2164. case Intrinsic::x86_sse_cvtss2si64:
  2165. case Intrinsic::x86_sse2_cvtsd2si:
  2166. case Intrinsic::x86_sse2_cvtsd2si64:
  2167. if (ConstantFP *FPOp =
  2168. dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
  2169. return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
  2170. /*roundTowardZero=*/false, Ty,
  2171. /*IsSigned*/true);
  2172. break;
  2173. case Intrinsic::x86_sse_cvttss2si:
  2174. case Intrinsic::x86_sse_cvttss2si64:
  2175. case Intrinsic::x86_sse2_cvttsd2si:
  2176. case Intrinsic::x86_sse2_cvttsd2si64:
  2177. if (ConstantFP *FPOp =
  2178. dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
  2179. return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
  2180. /*roundTowardZero=*/true, Ty,
  2181. /*IsSigned*/true);
  2182. break;
  2183. }
  2184. }
  2185. return nullptr;
  2186. }
  2187. static Constant *evaluateCompare(const APFloat &Op1, const APFloat &Op2,
  2188. const ConstrainedFPIntrinsic *Call) {
  2189. APFloat::opStatus St = APFloat::opOK;
  2190. auto *FCmp = cast<ConstrainedFPCmpIntrinsic>(Call);
  2191. FCmpInst::Predicate Cond = FCmp->getPredicate();
  2192. if (FCmp->isSignaling()) {
  2193. if (Op1.isNaN() || Op2.isNaN())
  2194. St = APFloat::opInvalidOp;
  2195. } else {
  2196. if (Op1.isSignaling() || Op2.isSignaling())
  2197. St = APFloat::opInvalidOp;
  2198. }
  2199. bool Result = FCmpInst::compare(Op1, Op2, Cond);
  2200. if (mayFoldConstrained(const_cast<ConstrainedFPCmpIntrinsic *>(FCmp), St))
  2201. return ConstantInt::get(Call->getType()->getScalarType(), Result);
  2202. return nullptr;
  2203. }
  2204. static Constant *ConstantFoldScalarCall2(StringRef Name,
  2205. Intrinsic::ID IntrinsicID,
  2206. Type *Ty,
  2207. ArrayRef<Constant *> Operands,
  2208. const TargetLibraryInfo *TLI,
  2209. const CallBase *Call) {
  2210. assert(Operands.size() == 2 && "Wrong number of operands.");
  2211. if (Ty->isFloatingPointTy()) {
  2212. // TODO: We should have undef handling for all of the FP intrinsics that
  2213. // are attempted to be folded in this function.
  2214. bool IsOp0Undef = isa<UndefValue>(Operands[0]);
  2215. bool IsOp1Undef = isa<UndefValue>(Operands[1]);
  2216. switch (IntrinsicID) {
  2217. case Intrinsic::maxnum:
  2218. case Intrinsic::minnum:
  2219. case Intrinsic::maximum:
  2220. case Intrinsic::minimum:
  2221. // If one argument is undef, return the other argument.
  2222. if (IsOp0Undef)
  2223. return Operands[1];
  2224. if (IsOp1Undef)
  2225. return Operands[0];
  2226. break;
  2227. }
  2228. }
  2229. if (const auto *Op1 = dyn_cast<ConstantFP>(Operands[0])) {
  2230. const APFloat &Op1V = Op1->getValueAPF();
  2231. if (const auto *Op2 = dyn_cast<ConstantFP>(Operands[1])) {
  2232. if (Op2->getType() != Op1->getType())
  2233. return nullptr;
  2234. const APFloat &Op2V = Op2->getValueAPF();
  2235. if (const auto *ConstrIntr = dyn_cast<ConstrainedFPIntrinsic>(Call)) {
  2236. RoundingMode RM = getEvaluationRoundingMode(ConstrIntr);
  2237. APFloat Res = Op1V;
  2238. APFloat::opStatus St;
  2239. switch (IntrinsicID) {
  2240. default:
  2241. return nullptr;
  2242. case Intrinsic::experimental_constrained_fadd:
  2243. St = Res.add(Op2V, RM);
  2244. break;
  2245. case Intrinsic::experimental_constrained_fsub:
  2246. St = Res.subtract(Op2V, RM);
  2247. break;
  2248. case Intrinsic::experimental_constrained_fmul:
  2249. St = Res.multiply(Op2V, RM);
  2250. break;
  2251. case Intrinsic::experimental_constrained_fdiv:
  2252. St = Res.divide(Op2V, RM);
  2253. break;
  2254. case Intrinsic::experimental_constrained_frem:
  2255. St = Res.mod(Op2V);
  2256. break;
  2257. case Intrinsic::experimental_constrained_fcmp:
  2258. case Intrinsic::experimental_constrained_fcmps:
  2259. return evaluateCompare(Op1V, Op2V, ConstrIntr);
  2260. }
  2261. if (mayFoldConstrained(const_cast<ConstrainedFPIntrinsic *>(ConstrIntr),
  2262. St))
  2263. return ConstantFP::get(Ty->getContext(), Res);
  2264. return nullptr;
  2265. }
  2266. switch (IntrinsicID) {
  2267. default:
  2268. break;
  2269. case Intrinsic::copysign:
  2270. return ConstantFP::get(Ty->getContext(), APFloat::copySign(Op1V, Op2V));
  2271. case Intrinsic::minnum:
  2272. return ConstantFP::get(Ty->getContext(), minnum(Op1V, Op2V));
  2273. case Intrinsic::maxnum:
  2274. return ConstantFP::get(Ty->getContext(), maxnum(Op1V, Op2V));
  2275. case Intrinsic::minimum:
  2276. return ConstantFP::get(Ty->getContext(), minimum(Op1V, Op2V));
  2277. case Intrinsic::maximum:
  2278. return ConstantFP::get(Ty->getContext(), maximum(Op1V, Op2V));
  2279. }
  2280. if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())
  2281. return nullptr;
  2282. switch (IntrinsicID) {
  2283. default:
  2284. break;
  2285. case Intrinsic::pow:
  2286. return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
  2287. case Intrinsic::amdgcn_fmul_legacy:
  2288. // The legacy behaviour is that multiplying +/- 0.0 by anything, even
  2289. // NaN or infinity, gives +0.0.
  2290. if (Op1V.isZero() || Op2V.isZero())
  2291. return ConstantFP::getNullValue(Ty);
  2292. return ConstantFP::get(Ty->getContext(), Op1V * Op2V);
  2293. }
  2294. if (!TLI)
  2295. return nullptr;
  2296. LibFunc Func = NotLibFunc;
  2297. if (!TLI->getLibFunc(Name, Func))
  2298. return nullptr;
  2299. switch (Func) {
  2300. default:
  2301. break;
  2302. case LibFunc_pow:
  2303. case LibFunc_powf:
  2304. case LibFunc_pow_finite:
  2305. case LibFunc_powf_finite:
  2306. if (TLI->has(Func))
  2307. return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
  2308. break;
  2309. case LibFunc_fmod:
  2310. case LibFunc_fmodf:
  2311. if (TLI->has(Func)) {
  2312. APFloat V = Op1->getValueAPF();
  2313. if (APFloat::opStatus::opOK == V.mod(Op2->getValueAPF()))
  2314. return ConstantFP::get(Ty->getContext(), V);
  2315. }
  2316. break;
  2317. case LibFunc_remainder:
  2318. case LibFunc_remainderf:
  2319. if (TLI->has(Func)) {
  2320. APFloat V = Op1->getValueAPF();
  2321. if (APFloat::opStatus::opOK == V.remainder(Op2->getValueAPF()))
  2322. return ConstantFP::get(Ty->getContext(), V);
  2323. }
  2324. break;
  2325. case LibFunc_atan2:
  2326. case LibFunc_atan2f:
  2327. // atan2(+/-0.0, +/-0.0) is known to raise an exception on some libm
  2328. // (Solaris), so we do not assume a known result for that.
  2329. if (Op1V.isZero() && Op2V.isZero())
  2330. return nullptr;
  2331. [[fallthrough]];
  2332. case LibFunc_atan2_finite:
  2333. case LibFunc_atan2f_finite:
  2334. if (TLI->has(Func))
  2335. return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty);
  2336. break;
  2337. }
  2338. } else if (auto *Op2C = dyn_cast<ConstantInt>(Operands[1])) {
  2339. switch (IntrinsicID) {
  2340. case Intrinsic::is_fpclass: {
  2341. uint32_t Mask = Op2C->getZExtValue();
  2342. bool Result =
  2343. ((Mask & fcSNan) && Op1V.isNaN() && Op1V.isSignaling()) ||
  2344. ((Mask & fcQNan) && Op1V.isNaN() && !Op1V.isSignaling()) ||
  2345. ((Mask & fcNegInf) && Op1V.isInfinity() && Op1V.isNegative()) ||
  2346. ((Mask & fcNegNormal) && Op1V.isNormal() && Op1V.isNegative()) ||
  2347. ((Mask & fcNegSubnormal) && Op1V.isDenormal() && Op1V.isNegative()) ||
  2348. ((Mask & fcNegZero) && Op1V.isZero() && Op1V.isNegative()) ||
  2349. ((Mask & fcPosZero) && Op1V.isZero() && !Op1V.isNegative()) ||
  2350. ((Mask & fcPosSubnormal) && Op1V.isDenormal() && !Op1V.isNegative()) ||
  2351. ((Mask & fcPosNormal) && Op1V.isNormal() && !Op1V.isNegative()) ||
  2352. ((Mask & fcPosInf) && Op1V.isInfinity() && !Op1V.isNegative());
  2353. return ConstantInt::get(Ty, Result);
  2354. }
  2355. default:
  2356. break;
  2357. }
  2358. if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())
  2359. return nullptr;
  2360. if (IntrinsicID == Intrinsic::powi && Ty->isHalfTy())
  2361. return ConstantFP::get(
  2362. Ty->getContext(),
  2363. APFloat((float)std::pow((float)Op1V.convertToDouble(),
  2364. (int)Op2C->getZExtValue())));
  2365. if (IntrinsicID == Intrinsic::powi && Ty->isFloatTy())
  2366. return ConstantFP::get(
  2367. Ty->getContext(),
  2368. APFloat((float)std::pow((float)Op1V.convertToDouble(),
  2369. (int)Op2C->getZExtValue())));
  2370. if (IntrinsicID == Intrinsic::powi && Ty->isDoubleTy())
  2371. return ConstantFP::get(
  2372. Ty->getContext(),
  2373. APFloat((double)std::pow(Op1V.convertToDouble(),
  2374. (int)Op2C->getZExtValue())));
  2375. if (IntrinsicID == Intrinsic::amdgcn_ldexp) {
  2376. // FIXME: Should flush denorms depending on FP mode, but that's ignored
  2377. // everywhere else.
  2378. // scalbn is equivalent to ldexp with float radix 2
  2379. APFloat Result = scalbn(Op1->getValueAPF(), Op2C->getSExtValue(),
  2380. APFloat::rmNearestTiesToEven);
  2381. return ConstantFP::get(Ty->getContext(), Result);
  2382. }
  2383. }
  2384. return nullptr;
  2385. }
  2386. if (Operands[0]->getType()->isIntegerTy() &&
  2387. Operands[1]->getType()->isIntegerTy()) {
  2388. const APInt *C0, *C1;
  2389. if (!getConstIntOrUndef(Operands[0], C0) ||
  2390. !getConstIntOrUndef(Operands[1], C1))
  2391. return nullptr;
  2392. switch (IntrinsicID) {
  2393. default: break;
  2394. case Intrinsic::smax:
  2395. case Intrinsic::smin:
  2396. case Intrinsic::umax:
  2397. case Intrinsic::umin:
  2398. // This is the same as for binary ops - poison propagates.
  2399. // TODO: Poison handling should be consolidated.
  2400. if (isa<PoisonValue>(Operands[0]) || isa<PoisonValue>(Operands[1]))
  2401. return PoisonValue::get(Ty);
  2402. if (!C0 && !C1)
  2403. return UndefValue::get(Ty);
  2404. if (!C0 || !C1)
  2405. return MinMaxIntrinsic::getSaturationPoint(IntrinsicID, Ty);
  2406. return ConstantInt::get(
  2407. Ty, ICmpInst::compare(*C0, *C1,
  2408. MinMaxIntrinsic::getPredicate(IntrinsicID))
  2409. ? *C0
  2410. : *C1);
  2411. case Intrinsic::usub_with_overflow:
  2412. case Intrinsic::ssub_with_overflow:
  2413. // X - undef -> { 0, false }
  2414. // undef - X -> { 0, false }
  2415. if (!C0 || !C1)
  2416. return Constant::getNullValue(Ty);
  2417. [[fallthrough]];
  2418. case Intrinsic::uadd_with_overflow:
  2419. case Intrinsic::sadd_with_overflow:
  2420. // X + undef -> { -1, false }
  2421. // undef + x -> { -1, false }
  2422. if (!C0 || !C1) {
  2423. return ConstantStruct::get(
  2424. cast<StructType>(Ty),
  2425. {Constant::getAllOnesValue(Ty->getStructElementType(0)),
  2426. Constant::getNullValue(Ty->getStructElementType(1))});
  2427. }
  2428. [[fallthrough]];
  2429. case Intrinsic::smul_with_overflow:
  2430. case Intrinsic::umul_with_overflow: {
  2431. // undef * X -> { 0, false }
  2432. // X * undef -> { 0, false }
  2433. if (!C0 || !C1)
  2434. return Constant::getNullValue(Ty);
  2435. APInt Res;
  2436. bool Overflow;
  2437. switch (IntrinsicID) {
  2438. default: llvm_unreachable("Invalid case");
  2439. case Intrinsic::sadd_with_overflow:
  2440. Res = C0->sadd_ov(*C1, Overflow);
  2441. break;
  2442. case Intrinsic::uadd_with_overflow:
  2443. Res = C0->uadd_ov(*C1, Overflow);
  2444. break;
  2445. case Intrinsic::ssub_with_overflow:
  2446. Res = C0->ssub_ov(*C1, Overflow);
  2447. break;
  2448. case Intrinsic::usub_with_overflow:
  2449. Res = C0->usub_ov(*C1, Overflow);
  2450. break;
  2451. case Intrinsic::smul_with_overflow:
  2452. Res = C0->smul_ov(*C1, Overflow);
  2453. break;
  2454. case Intrinsic::umul_with_overflow:
  2455. Res = C0->umul_ov(*C1, Overflow);
  2456. break;
  2457. }
  2458. Constant *Ops[] = {
  2459. ConstantInt::get(Ty->getContext(), Res),
  2460. ConstantInt::get(Type::getInt1Ty(Ty->getContext()), Overflow)
  2461. };
  2462. return ConstantStruct::get(cast<StructType>(Ty), Ops);
  2463. }
  2464. case Intrinsic::uadd_sat:
  2465. case Intrinsic::sadd_sat:
  2466. // This is the same as for binary ops - poison propagates.
  2467. // TODO: Poison handling should be consolidated.
  2468. if (isa<PoisonValue>(Operands[0]) || isa<PoisonValue>(Operands[1]))
  2469. return PoisonValue::get(Ty);
  2470. if (!C0 && !C1)
  2471. return UndefValue::get(Ty);
  2472. if (!C0 || !C1)
  2473. return Constant::getAllOnesValue(Ty);
  2474. if (IntrinsicID == Intrinsic::uadd_sat)
  2475. return ConstantInt::get(Ty, C0->uadd_sat(*C1));
  2476. else
  2477. return ConstantInt::get(Ty, C0->sadd_sat(*C1));
  2478. case Intrinsic::usub_sat:
  2479. case Intrinsic::ssub_sat:
  2480. // This is the same as for binary ops - poison propagates.
  2481. // TODO: Poison handling should be consolidated.
  2482. if (isa<PoisonValue>(Operands[0]) || isa<PoisonValue>(Operands[1]))
  2483. return PoisonValue::get(Ty);
  2484. if (!C0 && !C1)
  2485. return UndefValue::get(Ty);
  2486. if (!C0 || !C1)
  2487. return Constant::getNullValue(Ty);
  2488. if (IntrinsicID == Intrinsic::usub_sat)
  2489. return ConstantInt::get(Ty, C0->usub_sat(*C1));
  2490. else
  2491. return ConstantInt::get(Ty, C0->ssub_sat(*C1));
  2492. case Intrinsic::cttz:
  2493. case Intrinsic::ctlz:
  2494. assert(C1 && "Must be constant int");
  2495. // cttz(0, 1) and ctlz(0, 1) are poison.
  2496. if (C1->isOne() && (!C0 || C0->isZero()))
  2497. return PoisonValue::get(Ty);
  2498. if (!C0)
  2499. return Constant::getNullValue(Ty);
  2500. if (IntrinsicID == Intrinsic::cttz)
  2501. return ConstantInt::get(Ty, C0->countTrailingZeros());
  2502. else
  2503. return ConstantInt::get(Ty, C0->countLeadingZeros());
  2504. case Intrinsic::abs:
  2505. assert(C1 && "Must be constant int");
  2506. assert((C1->isOne() || C1->isZero()) && "Must be 0 or 1");
  2507. // Undef or minimum val operand with poison min --> undef
  2508. if (C1->isOne() && (!C0 || C0->isMinSignedValue()))
  2509. return UndefValue::get(Ty);
  2510. // Undef operand with no poison min --> 0 (sign bit must be clear)
  2511. if (!C0)
  2512. return Constant::getNullValue(Ty);
  2513. return ConstantInt::get(Ty, C0->abs());
  2514. }
  2515. return nullptr;
  2516. }
  2517. // Support ConstantVector in case we have an Undef in the top.
  2518. if ((isa<ConstantVector>(Operands[0]) ||
  2519. isa<ConstantDataVector>(Operands[0])) &&
  2520. // Check for default rounding mode.
  2521. // FIXME: Support other rounding modes?
  2522. isa<ConstantInt>(Operands[1]) &&
  2523. cast<ConstantInt>(Operands[1])->getValue() == 4) {
  2524. auto *Op = cast<Constant>(Operands[0]);
  2525. switch (IntrinsicID) {
  2526. default: break;
  2527. case Intrinsic::x86_avx512_vcvtss2si32:
  2528. case Intrinsic::x86_avx512_vcvtss2si64:
  2529. case Intrinsic::x86_avx512_vcvtsd2si32:
  2530. case Intrinsic::x86_avx512_vcvtsd2si64:
  2531. if (ConstantFP *FPOp =
  2532. dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
  2533. return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
  2534. /*roundTowardZero=*/false, Ty,
  2535. /*IsSigned*/true);
  2536. break;
  2537. case Intrinsic::x86_avx512_vcvtss2usi32:
  2538. case Intrinsic::x86_avx512_vcvtss2usi64:
  2539. case Intrinsic::x86_avx512_vcvtsd2usi32:
  2540. case Intrinsic::x86_avx512_vcvtsd2usi64:
  2541. if (ConstantFP *FPOp =
  2542. dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
  2543. return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
  2544. /*roundTowardZero=*/false, Ty,
  2545. /*IsSigned*/false);
  2546. break;
  2547. case Intrinsic::x86_avx512_cvttss2si:
  2548. case Intrinsic::x86_avx512_cvttss2si64:
  2549. case Intrinsic::x86_avx512_cvttsd2si:
  2550. case Intrinsic::x86_avx512_cvttsd2si64:
  2551. if (ConstantFP *FPOp =
  2552. dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
  2553. return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
  2554. /*roundTowardZero=*/true, Ty,
  2555. /*IsSigned*/true);
  2556. break;
  2557. case Intrinsic::x86_avx512_cvttss2usi:
  2558. case Intrinsic::x86_avx512_cvttss2usi64:
  2559. case Intrinsic::x86_avx512_cvttsd2usi:
  2560. case Intrinsic::x86_avx512_cvttsd2usi64:
  2561. if (ConstantFP *FPOp =
  2562. dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
  2563. return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
  2564. /*roundTowardZero=*/true, Ty,
  2565. /*IsSigned*/false);
  2566. break;
  2567. }
  2568. }
  2569. return nullptr;
  2570. }
  2571. static APFloat ConstantFoldAMDGCNCubeIntrinsic(Intrinsic::ID IntrinsicID,
  2572. const APFloat &S0,
  2573. const APFloat &S1,
  2574. const APFloat &S2) {
  2575. unsigned ID;
  2576. const fltSemantics &Sem = S0.getSemantics();
  2577. APFloat MA(Sem), SC(Sem), TC(Sem);
  2578. if (abs(S2) >= abs(S0) && abs(S2) >= abs(S1)) {
  2579. if (S2.isNegative() && S2.isNonZero() && !S2.isNaN()) {
  2580. // S2 < 0
  2581. ID = 5;
  2582. SC = -S0;
  2583. } else {
  2584. ID = 4;
  2585. SC = S0;
  2586. }
  2587. MA = S2;
  2588. TC = -S1;
  2589. } else if (abs(S1) >= abs(S0)) {
  2590. if (S1.isNegative() && S1.isNonZero() && !S1.isNaN()) {
  2591. // S1 < 0
  2592. ID = 3;
  2593. TC = -S2;
  2594. } else {
  2595. ID = 2;
  2596. TC = S2;
  2597. }
  2598. MA = S1;
  2599. SC = S0;
  2600. } else {
  2601. if (S0.isNegative() && S0.isNonZero() && !S0.isNaN()) {
  2602. // S0 < 0
  2603. ID = 1;
  2604. SC = S2;
  2605. } else {
  2606. ID = 0;
  2607. SC = -S2;
  2608. }
  2609. MA = S0;
  2610. TC = -S1;
  2611. }
  2612. switch (IntrinsicID) {
  2613. default:
  2614. llvm_unreachable("unhandled amdgcn cube intrinsic");
  2615. case Intrinsic::amdgcn_cubeid:
  2616. return APFloat(Sem, ID);
  2617. case Intrinsic::amdgcn_cubema:
  2618. return MA + MA;
  2619. case Intrinsic::amdgcn_cubesc:
  2620. return SC;
  2621. case Intrinsic::amdgcn_cubetc:
  2622. return TC;
  2623. }
  2624. }
  2625. static Constant *ConstantFoldAMDGCNPermIntrinsic(ArrayRef<Constant *> Operands,
  2626. Type *Ty) {
  2627. const APInt *C0, *C1, *C2;
  2628. if (!getConstIntOrUndef(Operands[0], C0) ||
  2629. !getConstIntOrUndef(Operands[1], C1) ||
  2630. !getConstIntOrUndef(Operands[2], C2))
  2631. return nullptr;
  2632. if (!C2)
  2633. return UndefValue::get(Ty);
  2634. APInt Val(32, 0);
  2635. unsigned NumUndefBytes = 0;
  2636. for (unsigned I = 0; I < 32; I += 8) {
  2637. unsigned Sel = C2->extractBitsAsZExtValue(8, I);
  2638. unsigned B = 0;
  2639. if (Sel >= 13)
  2640. B = 0xff;
  2641. else if (Sel == 12)
  2642. B = 0x00;
  2643. else {
  2644. const APInt *Src = ((Sel & 10) == 10 || (Sel & 12) == 4) ? C0 : C1;
  2645. if (!Src)
  2646. ++NumUndefBytes;
  2647. else if (Sel < 8)
  2648. B = Src->extractBitsAsZExtValue(8, (Sel & 3) * 8);
  2649. else
  2650. B = Src->extractBitsAsZExtValue(1, (Sel & 1) ? 31 : 15) * 0xff;
  2651. }
  2652. Val.insertBits(B, I, 8);
  2653. }
  2654. if (NumUndefBytes == 4)
  2655. return UndefValue::get(Ty);
  2656. return ConstantInt::get(Ty, Val);
  2657. }
  2658. static Constant *ConstantFoldScalarCall3(StringRef Name,
  2659. Intrinsic::ID IntrinsicID,
  2660. Type *Ty,
  2661. ArrayRef<Constant *> Operands,
  2662. const TargetLibraryInfo *TLI,
  2663. const CallBase *Call) {
  2664. assert(Operands.size() == 3 && "Wrong number of operands.");
  2665. if (const auto *Op1 = dyn_cast<ConstantFP>(Operands[0])) {
  2666. if (const auto *Op2 = dyn_cast<ConstantFP>(Operands[1])) {
  2667. if (const auto *Op3 = dyn_cast<ConstantFP>(Operands[2])) {
  2668. const APFloat &C1 = Op1->getValueAPF();
  2669. const APFloat &C2 = Op2->getValueAPF();
  2670. const APFloat &C3 = Op3->getValueAPF();
  2671. if (const auto *ConstrIntr = dyn_cast<ConstrainedFPIntrinsic>(Call)) {
  2672. RoundingMode RM = getEvaluationRoundingMode(ConstrIntr);
  2673. APFloat Res = C1;
  2674. APFloat::opStatus St;
  2675. switch (IntrinsicID) {
  2676. default:
  2677. return nullptr;
  2678. case Intrinsic::experimental_constrained_fma:
  2679. case Intrinsic::experimental_constrained_fmuladd:
  2680. St = Res.fusedMultiplyAdd(C2, C3, RM);
  2681. break;
  2682. }
  2683. if (mayFoldConstrained(
  2684. const_cast<ConstrainedFPIntrinsic *>(ConstrIntr), St))
  2685. return ConstantFP::get(Ty->getContext(), Res);
  2686. return nullptr;
  2687. }
  2688. switch (IntrinsicID) {
  2689. default: break;
  2690. case Intrinsic::amdgcn_fma_legacy: {
  2691. // The legacy behaviour is that multiplying +/- 0.0 by anything, even
  2692. // NaN or infinity, gives +0.0.
  2693. if (C1.isZero() || C2.isZero()) {
  2694. // It's tempting to just return C3 here, but that would give the
  2695. // wrong result if C3 was -0.0.
  2696. return ConstantFP::get(Ty->getContext(), APFloat(0.0f) + C3);
  2697. }
  2698. [[fallthrough]];
  2699. }
  2700. case Intrinsic::fma:
  2701. case Intrinsic::fmuladd: {
  2702. APFloat V = C1;
  2703. V.fusedMultiplyAdd(C2, C3, APFloat::rmNearestTiesToEven);
  2704. return ConstantFP::get(Ty->getContext(), V);
  2705. }
  2706. case Intrinsic::amdgcn_cubeid:
  2707. case Intrinsic::amdgcn_cubema:
  2708. case Intrinsic::amdgcn_cubesc:
  2709. case Intrinsic::amdgcn_cubetc: {
  2710. APFloat V = ConstantFoldAMDGCNCubeIntrinsic(IntrinsicID, C1, C2, C3);
  2711. return ConstantFP::get(Ty->getContext(), V);
  2712. }
  2713. }
  2714. }
  2715. }
  2716. }
  2717. if (IntrinsicID == Intrinsic::smul_fix ||
  2718. IntrinsicID == Intrinsic::smul_fix_sat) {
  2719. // poison * C -> poison
  2720. // C * poison -> poison
  2721. if (isa<PoisonValue>(Operands[0]) || isa<PoisonValue>(Operands[1]))
  2722. return PoisonValue::get(Ty);
  2723. const APInt *C0, *C1;
  2724. if (!getConstIntOrUndef(Operands[0], C0) ||
  2725. !getConstIntOrUndef(Operands[1], C1))
  2726. return nullptr;
  2727. // undef * C -> 0
  2728. // C * undef -> 0
  2729. if (!C0 || !C1)
  2730. return Constant::getNullValue(Ty);
  2731. // This code performs rounding towards negative infinity in case the result
  2732. // cannot be represented exactly for the given scale. Targets that do care
  2733. // about rounding should use a target hook for specifying how rounding
  2734. // should be done, and provide their own folding to be consistent with
  2735. // rounding. This is the same approach as used by
  2736. // DAGTypeLegalizer::ExpandIntRes_MULFIX.
  2737. unsigned Scale = cast<ConstantInt>(Operands[2])->getZExtValue();
  2738. unsigned Width = C0->getBitWidth();
  2739. assert(Scale < Width && "Illegal scale.");
  2740. unsigned ExtendedWidth = Width * 2;
  2741. APInt Product =
  2742. (C0->sext(ExtendedWidth) * C1->sext(ExtendedWidth)).ashr(Scale);
  2743. if (IntrinsicID == Intrinsic::smul_fix_sat) {
  2744. APInt Max = APInt::getSignedMaxValue(Width).sext(ExtendedWidth);
  2745. APInt Min = APInt::getSignedMinValue(Width).sext(ExtendedWidth);
  2746. Product = APIntOps::smin(Product, Max);
  2747. Product = APIntOps::smax(Product, Min);
  2748. }
  2749. return ConstantInt::get(Ty->getContext(), Product.sextOrTrunc(Width));
  2750. }
  2751. if (IntrinsicID == Intrinsic::fshl || IntrinsicID == Intrinsic::fshr) {
  2752. const APInt *C0, *C1, *C2;
  2753. if (!getConstIntOrUndef(Operands[0], C0) ||
  2754. !getConstIntOrUndef(Operands[1], C1) ||
  2755. !getConstIntOrUndef(Operands[2], C2))
  2756. return nullptr;
  2757. bool IsRight = IntrinsicID == Intrinsic::fshr;
  2758. if (!C2)
  2759. return Operands[IsRight ? 1 : 0];
  2760. if (!C0 && !C1)
  2761. return UndefValue::get(Ty);
  2762. // The shift amount is interpreted as modulo the bitwidth. If the shift
  2763. // amount is effectively 0, avoid UB due to oversized inverse shift below.
  2764. unsigned BitWidth = C2->getBitWidth();
  2765. unsigned ShAmt = C2->urem(BitWidth);
  2766. if (!ShAmt)
  2767. return Operands[IsRight ? 1 : 0];
  2768. // (C0 << ShlAmt) | (C1 >> LshrAmt)
  2769. unsigned LshrAmt = IsRight ? ShAmt : BitWidth - ShAmt;
  2770. unsigned ShlAmt = !IsRight ? ShAmt : BitWidth - ShAmt;
  2771. if (!C0)
  2772. return ConstantInt::get(Ty, C1->lshr(LshrAmt));
  2773. if (!C1)
  2774. return ConstantInt::get(Ty, C0->shl(ShlAmt));
  2775. return ConstantInt::get(Ty, C0->shl(ShlAmt) | C1->lshr(LshrAmt));
  2776. }
  2777. if (IntrinsicID == Intrinsic::amdgcn_perm)
  2778. return ConstantFoldAMDGCNPermIntrinsic(Operands, Ty);
  2779. return nullptr;
  2780. }
  2781. static Constant *ConstantFoldScalarCall(StringRef Name,
  2782. Intrinsic::ID IntrinsicID,
  2783. Type *Ty,
  2784. ArrayRef<Constant *> Operands,
  2785. const TargetLibraryInfo *TLI,
  2786. const CallBase *Call) {
  2787. if (Operands.size() == 1)
  2788. return ConstantFoldScalarCall1(Name, IntrinsicID, Ty, Operands, TLI, Call);
  2789. if (Operands.size() == 2)
  2790. return ConstantFoldScalarCall2(Name, IntrinsicID, Ty, Operands, TLI, Call);
  2791. if (Operands.size() == 3)
  2792. return ConstantFoldScalarCall3(Name, IntrinsicID, Ty, Operands, TLI, Call);
  2793. return nullptr;
  2794. }
  2795. static Constant *ConstantFoldFixedVectorCall(
  2796. StringRef Name, Intrinsic::ID IntrinsicID, FixedVectorType *FVTy,
  2797. ArrayRef<Constant *> Operands, const DataLayout &DL,
  2798. const TargetLibraryInfo *TLI, const CallBase *Call) {
  2799. SmallVector<Constant *, 4> Result(FVTy->getNumElements());
  2800. SmallVector<Constant *, 4> Lane(Operands.size());
  2801. Type *Ty = FVTy->getElementType();
  2802. switch (IntrinsicID) {
  2803. case Intrinsic::masked_load: {
  2804. auto *SrcPtr = Operands[0];
  2805. auto *Mask = Operands[2];
  2806. auto *Passthru = Operands[3];
  2807. Constant *VecData = ConstantFoldLoadFromConstPtr(SrcPtr, FVTy, DL);
  2808. SmallVector<Constant *, 32> NewElements;
  2809. for (unsigned I = 0, E = FVTy->getNumElements(); I != E; ++I) {
  2810. auto *MaskElt = Mask->getAggregateElement(I);
  2811. if (!MaskElt)
  2812. break;
  2813. auto *PassthruElt = Passthru->getAggregateElement(I);
  2814. auto *VecElt = VecData ? VecData->getAggregateElement(I) : nullptr;
  2815. if (isa<UndefValue>(MaskElt)) {
  2816. if (PassthruElt)
  2817. NewElements.push_back(PassthruElt);
  2818. else if (VecElt)
  2819. NewElements.push_back(VecElt);
  2820. else
  2821. return nullptr;
  2822. }
  2823. if (MaskElt->isNullValue()) {
  2824. if (!PassthruElt)
  2825. return nullptr;
  2826. NewElements.push_back(PassthruElt);
  2827. } else if (MaskElt->isOneValue()) {
  2828. if (!VecElt)
  2829. return nullptr;
  2830. NewElements.push_back(VecElt);
  2831. } else {
  2832. return nullptr;
  2833. }
  2834. }
  2835. if (NewElements.size() != FVTy->getNumElements())
  2836. return nullptr;
  2837. return ConstantVector::get(NewElements);
  2838. }
  2839. case Intrinsic::arm_mve_vctp8:
  2840. case Intrinsic::arm_mve_vctp16:
  2841. case Intrinsic::arm_mve_vctp32:
  2842. case Intrinsic::arm_mve_vctp64: {
  2843. if (auto *Op = dyn_cast<ConstantInt>(Operands[0])) {
  2844. unsigned Lanes = FVTy->getNumElements();
  2845. uint64_t Limit = Op->getZExtValue();
  2846. SmallVector<Constant *, 16> NCs;
  2847. for (unsigned i = 0; i < Lanes; i++) {
  2848. if (i < Limit)
  2849. NCs.push_back(ConstantInt::getTrue(Ty));
  2850. else
  2851. NCs.push_back(ConstantInt::getFalse(Ty));
  2852. }
  2853. return ConstantVector::get(NCs);
  2854. }
  2855. return nullptr;
  2856. }
  2857. case Intrinsic::get_active_lane_mask: {
  2858. auto *Op0 = dyn_cast<ConstantInt>(Operands[0]);
  2859. auto *Op1 = dyn_cast<ConstantInt>(Operands[1]);
  2860. if (Op0 && Op1) {
  2861. unsigned Lanes = FVTy->getNumElements();
  2862. uint64_t Base = Op0->getZExtValue();
  2863. uint64_t Limit = Op1->getZExtValue();
  2864. SmallVector<Constant *, 16> NCs;
  2865. for (unsigned i = 0; i < Lanes; i++) {
  2866. if (Base + i < Limit)
  2867. NCs.push_back(ConstantInt::getTrue(Ty));
  2868. else
  2869. NCs.push_back(ConstantInt::getFalse(Ty));
  2870. }
  2871. return ConstantVector::get(NCs);
  2872. }
  2873. return nullptr;
  2874. }
  2875. default:
  2876. break;
  2877. }
  2878. for (unsigned I = 0, E = FVTy->getNumElements(); I != E; ++I) {
  2879. // Gather a column of constants.
  2880. for (unsigned J = 0, JE = Operands.size(); J != JE; ++J) {
  2881. // Some intrinsics use a scalar type for certain arguments.
  2882. if (isVectorIntrinsicWithScalarOpAtArg(IntrinsicID, J)) {
  2883. Lane[J] = Operands[J];
  2884. continue;
  2885. }
  2886. Constant *Agg = Operands[J]->getAggregateElement(I);
  2887. if (!Agg)
  2888. return nullptr;
  2889. Lane[J] = Agg;
  2890. }
  2891. // Use the regular scalar folding to simplify this column.
  2892. Constant *Folded =
  2893. ConstantFoldScalarCall(Name, IntrinsicID, Ty, Lane, TLI, Call);
  2894. if (!Folded)
  2895. return nullptr;
  2896. Result[I] = Folded;
  2897. }
  2898. return ConstantVector::get(Result);
  2899. }
  2900. static Constant *ConstantFoldScalableVectorCall(
  2901. StringRef Name, Intrinsic::ID IntrinsicID, ScalableVectorType *SVTy,
  2902. ArrayRef<Constant *> Operands, const DataLayout &DL,
  2903. const TargetLibraryInfo *TLI, const CallBase *Call) {
  2904. switch (IntrinsicID) {
  2905. case Intrinsic::aarch64_sve_convert_from_svbool: {
  2906. auto *Src = dyn_cast<Constant>(Operands[0]);
  2907. if (!Src || !Src->isNullValue())
  2908. break;
  2909. return ConstantInt::getFalse(SVTy);
  2910. }
  2911. default:
  2912. break;
  2913. }
  2914. return nullptr;
  2915. }
  2916. } // end anonymous namespace
  2917. Constant *llvm::ConstantFoldCall(const CallBase *Call, Function *F,
  2918. ArrayRef<Constant *> Operands,
  2919. const TargetLibraryInfo *TLI) {
  2920. if (Call->isNoBuiltin())
  2921. return nullptr;
  2922. if (!F->hasName())
  2923. return nullptr;
  2924. // If this is not an intrinsic and not recognized as a library call, bail out.
  2925. if (F->getIntrinsicID() == Intrinsic::not_intrinsic) {
  2926. if (!TLI)
  2927. return nullptr;
  2928. LibFunc LibF;
  2929. if (!TLI->getLibFunc(*F, LibF))
  2930. return nullptr;
  2931. }
  2932. StringRef Name = F->getName();
  2933. Type *Ty = F->getReturnType();
  2934. if (auto *FVTy = dyn_cast<FixedVectorType>(Ty))
  2935. return ConstantFoldFixedVectorCall(
  2936. Name, F->getIntrinsicID(), FVTy, Operands,
  2937. F->getParent()->getDataLayout(), TLI, Call);
  2938. if (auto *SVTy = dyn_cast<ScalableVectorType>(Ty))
  2939. return ConstantFoldScalableVectorCall(
  2940. Name, F->getIntrinsicID(), SVTy, Operands,
  2941. F->getParent()->getDataLayout(), TLI, Call);
  2942. // TODO: If this is a library function, we already discovered that above,
  2943. // so we should pass the LibFunc, not the name (and it might be better
  2944. // still to separate intrinsic handling from libcalls).
  2945. return ConstantFoldScalarCall(Name, F->getIntrinsicID(), Ty, Operands, TLI,
  2946. Call);
  2947. }
  2948. bool llvm::isMathLibCallNoop(const CallBase *Call,
  2949. const TargetLibraryInfo *TLI) {
  2950. // FIXME: Refactor this code; this duplicates logic in LibCallsShrinkWrap
  2951. // (and to some extent ConstantFoldScalarCall).
  2952. if (Call->isNoBuiltin() || Call->isStrictFP())
  2953. return false;
  2954. Function *F = Call->getCalledFunction();
  2955. if (!F)
  2956. return false;
  2957. LibFunc Func;
  2958. if (!TLI || !TLI->getLibFunc(*F, Func))
  2959. return false;
  2960. if (Call->arg_size() == 1) {
  2961. if (ConstantFP *OpC = dyn_cast<ConstantFP>(Call->getArgOperand(0))) {
  2962. const APFloat &Op = OpC->getValueAPF();
  2963. switch (Func) {
  2964. case LibFunc_logl:
  2965. case LibFunc_log:
  2966. case LibFunc_logf:
  2967. case LibFunc_log2l:
  2968. case LibFunc_log2:
  2969. case LibFunc_log2f:
  2970. case LibFunc_log10l:
  2971. case LibFunc_log10:
  2972. case LibFunc_log10f:
  2973. return Op.isNaN() || (!Op.isZero() && !Op.isNegative());
  2974. case LibFunc_expl:
  2975. case LibFunc_exp:
  2976. case LibFunc_expf:
  2977. // FIXME: These boundaries are slightly conservative.
  2978. if (OpC->getType()->isDoubleTy())
  2979. return !(Op < APFloat(-745.0) || Op > APFloat(709.0));
  2980. if (OpC->getType()->isFloatTy())
  2981. return !(Op < APFloat(-103.0f) || Op > APFloat(88.0f));
  2982. break;
  2983. case LibFunc_exp2l:
  2984. case LibFunc_exp2:
  2985. case LibFunc_exp2f:
  2986. // FIXME: These boundaries are slightly conservative.
  2987. if (OpC->getType()->isDoubleTy())
  2988. return !(Op < APFloat(-1074.0) || Op > APFloat(1023.0));
  2989. if (OpC->getType()->isFloatTy())
  2990. return !(Op < APFloat(-149.0f) || Op > APFloat(127.0f));
  2991. break;
  2992. case LibFunc_sinl:
  2993. case LibFunc_sin:
  2994. case LibFunc_sinf:
  2995. case LibFunc_cosl:
  2996. case LibFunc_cos:
  2997. case LibFunc_cosf:
  2998. return !Op.isInfinity();
  2999. case LibFunc_tanl:
  3000. case LibFunc_tan:
  3001. case LibFunc_tanf: {
  3002. // FIXME: Stop using the host math library.
  3003. // FIXME: The computation isn't done in the right precision.
  3004. Type *Ty = OpC->getType();
  3005. if (Ty->isDoubleTy() || Ty->isFloatTy() || Ty->isHalfTy())
  3006. return ConstantFoldFP(tan, OpC->getValueAPF(), Ty) != nullptr;
  3007. break;
  3008. }
  3009. case LibFunc_atan:
  3010. case LibFunc_atanf:
  3011. case LibFunc_atanl:
  3012. // Per POSIX, this MAY fail if Op is denormal. We choose not failing.
  3013. return true;
  3014. case LibFunc_asinl:
  3015. case LibFunc_asin:
  3016. case LibFunc_asinf:
  3017. case LibFunc_acosl:
  3018. case LibFunc_acos:
  3019. case LibFunc_acosf:
  3020. return !(Op < APFloat(Op.getSemantics(), "-1") ||
  3021. Op > APFloat(Op.getSemantics(), "1"));
  3022. case LibFunc_sinh:
  3023. case LibFunc_cosh:
  3024. case LibFunc_sinhf:
  3025. case LibFunc_coshf:
  3026. case LibFunc_sinhl:
  3027. case LibFunc_coshl:
  3028. // FIXME: These boundaries are slightly conservative.
  3029. if (OpC->getType()->isDoubleTy())
  3030. return !(Op < APFloat(-710.0) || Op > APFloat(710.0));
  3031. if (OpC->getType()->isFloatTy())
  3032. return !(Op < APFloat(-89.0f) || Op > APFloat(89.0f));
  3033. break;
  3034. case LibFunc_sqrtl:
  3035. case LibFunc_sqrt:
  3036. case LibFunc_sqrtf:
  3037. return Op.isNaN() || Op.isZero() || !Op.isNegative();
  3038. // FIXME: Add more functions: sqrt_finite, atanh, expm1, log1p,
  3039. // maybe others?
  3040. default:
  3041. break;
  3042. }
  3043. }
  3044. }
  3045. if (Call->arg_size() == 2) {
  3046. ConstantFP *Op0C = dyn_cast<ConstantFP>(Call->getArgOperand(0));
  3047. ConstantFP *Op1C = dyn_cast<ConstantFP>(Call->getArgOperand(1));
  3048. if (Op0C && Op1C) {
  3049. const APFloat &Op0 = Op0C->getValueAPF();
  3050. const APFloat &Op1 = Op1C->getValueAPF();
  3051. switch (Func) {
  3052. case LibFunc_powl:
  3053. case LibFunc_pow:
  3054. case LibFunc_powf: {
  3055. // FIXME: Stop using the host math library.
  3056. // FIXME: The computation isn't done in the right precision.
  3057. Type *Ty = Op0C->getType();
  3058. if (Ty->isDoubleTy() || Ty->isFloatTy() || Ty->isHalfTy()) {
  3059. if (Ty == Op1C->getType())
  3060. return ConstantFoldBinaryFP(pow, Op0, Op1, Ty) != nullptr;
  3061. }
  3062. break;
  3063. }
  3064. case LibFunc_fmodl:
  3065. case LibFunc_fmod:
  3066. case LibFunc_fmodf:
  3067. case LibFunc_remainderl:
  3068. case LibFunc_remainder:
  3069. case LibFunc_remainderf:
  3070. return Op0.isNaN() || Op1.isNaN() ||
  3071. (!Op0.isInfinity() && !Op1.isZero());
  3072. case LibFunc_atan2:
  3073. case LibFunc_atan2f:
  3074. case LibFunc_atan2l:
  3075. // Although IEEE-754 says atan2(+/-0.0, +/-0.0) are well-defined, and
  3076. // GLIBC and MSVC do not appear to raise an error on those, we
  3077. // cannot rely on that behavior. POSIX and C11 say that a domain error
  3078. // may occur, so allow for that possibility.
  3079. return !Op0.isZero() || !Op1.isZero();
  3080. default:
  3081. break;
  3082. }
  3083. }
  3084. }
  3085. return false;
  3086. }
  3087. void TargetFolder::anchor() {}