InstCombineCasts.cpp 119 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927
  1. //===- InstCombineCasts.cpp -----------------------------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file implements the visit functions for cast operations.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "InstCombineInternal.h"
  13. #include "llvm/ADT/SetVector.h"
  14. #include "llvm/Analysis/ConstantFolding.h"
  15. #include "llvm/IR/DataLayout.h"
  16. #include "llvm/IR/DebugInfo.h"
  17. #include "llvm/IR/PatternMatch.h"
  18. #include "llvm/Support/KnownBits.h"
  19. #include "llvm/Transforms/InstCombine/InstCombiner.h"
  20. #include <optional>
  21. using namespace llvm;
  22. using namespace PatternMatch;
  23. #define DEBUG_TYPE "instcombine"
  24. /// Analyze 'Val', seeing if it is a simple linear expression.
  25. /// If so, decompose it, returning some value X, such that Val is
  26. /// X*Scale+Offset.
  27. ///
  28. static Value *decomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
  29. uint64_t &Offset) {
  30. if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
  31. Offset = CI->getZExtValue();
  32. Scale = 0;
  33. return ConstantInt::get(Val->getType(), 0);
  34. }
  35. if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) {
  36. // Cannot look past anything that might overflow.
  37. // We specifically require nuw because we store the Scale in an unsigned
  38. // and perform an unsigned divide on it.
  39. OverflowingBinaryOperator *OBI = dyn_cast<OverflowingBinaryOperator>(Val);
  40. if (OBI && !OBI->hasNoUnsignedWrap()) {
  41. Scale = 1;
  42. Offset = 0;
  43. return Val;
  44. }
  45. if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
  46. if (I->getOpcode() == Instruction::Shl) {
  47. // This is a value scaled by '1 << the shift amt'.
  48. Scale = UINT64_C(1) << RHS->getZExtValue();
  49. Offset = 0;
  50. return I->getOperand(0);
  51. }
  52. if (I->getOpcode() == Instruction::Mul) {
  53. // This value is scaled by 'RHS'.
  54. Scale = RHS->getZExtValue();
  55. Offset = 0;
  56. return I->getOperand(0);
  57. }
  58. if (I->getOpcode() == Instruction::Add) {
  59. // We have X+C. Check to see if we really have (X*C2)+C1,
  60. // where C1 is divisible by C2.
  61. unsigned SubScale;
  62. Value *SubVal =
  63. decomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset);
  64. Offset += RHS->getZExtValue();
  65. Scale = SubScale;
  66. return SubVal;
  67. }
  68. }
  69. }
  70. // Otherwise, we can't look past this.
  71. Scale = 1;
  72. Offset = 0;
  73. return Val;
  74. }
  75. /// If we find a cast of an allocation instruction, try to eliminate the cast by
  76. /// moving the type information into the alloc.
  77. Instruction *InstCombinerImpl::PromoteCastOfAllocation(BitCastInst &CI,
  78. AllocaInst &AI) {
  79. PointerType *PTy = cast<PointerType>(CI.getType());
  80. // Opaque pointers don't have an element type we could replace with.
  81. if (PTy->isOpaque())
  82. return nullptr;
  83. IRBuilderBase::InsertPointGuard Guard(Builder);
  84. Builder.SetInsertPoint(&AI);
  85. // Get the type really allocated and the type casted to.
  86. Type *AllocElTy = AI.getAllocatedType();
  87. Type *CastElTy = PTy->getNonOpaquePointerElementType();
  88. if (!AllocElTy->isSized() || !CastElTy->isSized()) return nullptr;
  89. // This optimisation does not work for cases where the cast type
  90. // is scalable and the allocated type is not. This because we need to
  91. // know how many times the casted type fits into the allocated type.
  92. // For the opposite case where the allocated type is scalable and the
  93. // cast type is not this leads to poor code quality due to the
  94. // introduction of 'vscale' into the calculations. It seems better to
  95. // bail out for this case too until we've done a proper cost-benefit
  96. // analysis.
  97. bool AllocIsScalable = isa<ScalableVectorType>(AllocElTy);
  98. bool CastIsScalable = isa<ScalableVectorType>(CastElTy);
  99. if (AllocIsScalable != CastIsScalable) return nullptr;
  100. Align AllocElTyAlign = DL.getABITypeAlign(AllocElTy);
  101. Align CastElTyAlign = DL.getABITypeAlign(CastElTy);
  102. if (CastElTyAlign < AllocElTyAlign) return nullptr;
  103. // If the allocation has multiple uses, only promote it if we are strictly
  104. // increasing the alignment of the resultant allocation. If we keep it the
  105. // same, we open the door to infinite loops of various kinds.
  106. if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return nullptr;
  107. // The alloc and cast types should be either both fixed or both scalable.
  108. uint64_t AllocElTySize = DL.getTypeAllocSize(AllocElTy).getKnownMinValue();
  109. uint64_t CastElTySize = DL.getTypeAllocSize(CastElTy).getKnownMinValue();
  110. if (CastElTySize == 0 || AllocElTySize == 0) return nullptr;
  111. // If the allocation has multiple uses, only promote it if we're not
  112. // shrinking the amount of memory being allocated.
  113. uint64_t AllocElTyStoreSize =
  114. DL.getTypeStoreSize(AllocElTy).getKnownMinValue();
  115. uint64_t CastElTyStoreSize = DL.getTypeStoreSize(CastElTy).getKnownMinValue();
  116. if (!AI.hasOneUse() && CastElTyStoreSize < AllocElTyStoreSize) return nullptr;
  117. // See if we can satisfy the modulus by pulling a scale out of the array
  118. // size argument.
  119. unsigned ArraySizeScale;
  120. uint64_t ArrayOffset;
  121. Value *NumElements = // See if the array size is a decomposable linear expr.
  122. decomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset);
  123. // If we can now satisfy the modulus, by using a non-1 scale, we really can
  124. // do the xform.
  125. if ((AllocElTySize*ArraySizeScale) % CastElTySize != 0 ||
  126. (AllocElTySize*ArrayOffset ) % CastElTySize != 0) return nullptr;
  127. // We don't currently support arrays of scalable types.
  128. assert(!AllocIsScalable || (ArrayOffset == 1 && ArraySizeScale == 0));
  129. unsigned Scale = (AllocElTySize*ArraySizeScale)/CastElTySize;
  130. Value *Amt = nullptr;
  131. if (Scale == 1) {
  132. Amt = NumElements;
  133. } else {
  134. Amt = ConstantInt::get(AI.getArraySize()->getType(), Scale);
  135. // Insert before the alloca, not before the cast.
  136. Amt = Builder.CreateMul(Amt, NumElements);
  137. }
  138. if (uint64_t Offset = (AllocElTySize*ArrayOffset)/CastElTySize) {
  139. Value *Off = ConstantInt::get(AI.getArraySize()->getType(),
  140. Offset, true);
  141. Amt = Builder.CreateAdd(Amt, Off);
  142. }
  143. AllocaInst *New = Builder.CreateAlloca(CastElTy, AI.getAddressSpace(), Amt);
  144. New->setAlignment(AI.getAlign());
  145. New->takeName(&AI);
  146. New->setUsedWithInAlloca(AI.isUsedWithInAlloca());
  147. New->setMetadata(LLVMContext::MD_DIAssignID,
  148. AI.getMetadata(LLVMContext::MD_DIAssignID));
  149. replaceAllDbgUsesWith(AI, *New, *New, DT);
  150. // If the allocation has multiple real uses, insert a cast and change all
  151. // things that used it to use the new cast. This will also hack on CI, but it
  152. // will die soon.
  153. if (!AI.hasOneUse()) {
  154. // New is the allocation instruction, pointer typed. AI is the original
  155. // allocation instruction, also pointer typed. Thus, cast to use is BitCast.
  156. Value *NewCast = Builder.CreateBitCast(New, AI.getType(), "tmpcast");
  157. replaceInstUsesWith(AI, NewCast);
  158. eraseInstFromFunction(AI);
  159. }
  160. return replaceInstUsesWith(CI, New);
  161. }
  162. /// Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns
  163. /// true for, actually insert the code to evaluate the expression.
  164. Value *InstCombinerImpl::EvaluateInDifferentType(Value *V, Type *Ty,
  165. bool isSigned) {
  166. if (Constant *C = dyn_cast<Constant>(V)) {
  167. C = ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/);
  168. // If we got a constantexpr back, try to simplify it with DL info.
  169. return ConstantFoldConstant(C, DL, &TLI);
  170. }
  171. // Otherwise, it must be an instruction.
  172. Instruction *I = cast<Instruction>(V);
  173. Instruction *Res = nullptr;
  174. unsigned Opc = I->getOpcode();
  175. switch (Opc) {
  176. case Instruction::Add:
  177. case Instruction::Sub:
  178. case Instruction::Mul:
  179. case Instruction::And:
  180. case Instruction::Or:
  181. case Instruction::Xor:
  182. case Instruction::AShr:
  183. case Instruction::LShr:
  184. case Instruction::Shl:
  185. case Instruction::UDiv:
  186. case Instruction::URem: {
  187. Value *LHS = EvaluateInDifferentType(I->getOperand(0), Ty, isSigned);
  188. Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
  189. Res = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
  190. break;
  191. }
  192. case Instruction::Trunc:
  193. case Instruction::ZExt:
  194. case Instruction::SExt:
  195. // If the source type of the cast is the type we're trying for then we can
  196. // just return the source. There's no need to insert it because it is not
  197. // new.
  198. if (I->getOperand(0)->getType() == Ty)
  199. return I->getOperand(0);
  200. // Otherwise, must be the same type of cast, so just reinsert a new one.
  201. // This also handles the case of zext(trunc(x)) -> zext(x).
  202. Res = CastInst::CreateIntegerCast(I->getOperand(0), Ty,
  203. Opc == Instruction::SExt);
  204. break;
  205. case Instruction::Select: {
  206. Value *True = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
  207. Value *False = EvaluateInDifferentType(I->getOperand(2), Ty, isSigned);
  208. Res = SelectInst::Create(I->getOperand(0), True, False);
  209. break;
  210. }
  211. case Instruction::PHI: {
  212. PHINode *OPN = cast<PHINode>(I);
  213. PHINode *NPN = PHINode::Create(Ty, OPN->getNumIncomingValues());
  214. for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) {
  215. Value *V =
  216. EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned);
  217. NPN->addIncoming(V, OPN->getIncomingBlock(i));
  218. }
  219. Res = NPN;
  220. break;
  221. }
  222. case Instruction::FPToUI:
  223. case Instruction::FPToSI:
  224. Res = CastInst::Create(
  225. static_cast<Instruction::CastOps>(Opc), I->getOperand(0), Ty);
  226. break;
  227. default:
  228. // TODO: Can handle more cases here.
  229. llvm_unreachable("Unreachable!");
  230. }
  231. Res->takeName(I);
  232. return InsertNewInstWith(Res, *I);
  233. }
  234. Instruction::CastOps
  235. InstCombinerImpl::isEliminableCastPair(const CastInst *CI1,
  236. const CastInst *CI2) {
  237. Type *SrcTy = CI1->getSrcTy();
  238. Type *MidTy = CI1->getDestTy();
  239. Type *DstTy = CI2->getDestTy();
  240. Instruction::CastOps firstOp = CI1->getOpcode();
  241. Instruction::CastOps secondOp = CI2->getOpcode();
  242. Type *SrcIntPtrTy =
  243. SrcTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(SrcTy) : nullptr;
  244. Type *MidIntPtrTy =
  245. MidTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(MidTy) : nullptr;
  246. Type *DstIntPtrTy =
  247. DstTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(DstTy) : nullptr;
  248. unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy,
  249. DstTy, SrcIntPtrTy, MidIntPtrTy,
  250. DstIntPtrTy);
  251. // We don't want to form an inttoptr or ptrtoint that converts to an integer
  252. // type that differs from the pointer size.
  253. if ((Res == Instruction::IntToPtr && SrcTy != DstIntPtrTy) ||
  254. (Res == Instruction::PtrToInt && DstTy != SrcIntPtrTy))
  255. Res = 0;
  256. return Instruction::CastOps(Res);
  257. }
  258. /// Implement the transforms common to all CastInst visitors.
  259. Instruction *InstCombinerImpl::commonCastTransforms(CastInst &CI) {
  260. Value *Src = CI.getOperand(0);
  261. Type *Ty = CI.getType();
  262. // Try to eliminate a cast of a cast.
  263. if (auto *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast
  264. if (Instruction::CastOps NewOpc = isEliminableCastPair(CSrc, &CI)) {
  265. // The first cast (CSrc) is eliminable so we need to fix up or replace
  266. // the second cast (CI). CSrc will then have a good chance of being dead.
  267. auto *Res = CastInst::Create(NewOpc, CSrc->getOperand(0), Ty);
  268. // Point debug users of the dying cast to the new one.
  269. if (CSrc->hasOneUse())
  270. replaceAllDbgUsesWith(*CSrc, *Res, CI, DT);
  271. return Res;
  272. }
  273. }
  274. if (auto *Sel = dyn_cast<SelectInst>(Src)) {
  275. // We are casting a select. Try to fold the cast into the select if the
  276. // select does not have a compare instruction with matching operand types
  277. // or the select is likely better done in a narrow type.
  278. // Creating a select with operands that are different sizes than its
  279. // condition may inhibit other folds and lead to worse codegen.
  280. auto *Cmp = dyn_cast<CmpInst>(Sel->getCondition());
  281. if (!Cmp || Cmp->getOperand(0)->getType() != Sel->getType() ||
  282. (CI.getOpcode() == Instruction::Trunc &&
  283. shouldChangeType(CI.getSrcTy(), CI.getType()))) {
  284. if (Instruction *NV = FoldOpIntoSelect(CI, Sel)) {
  285. replaceAllDbgUsesWith(*Sel, *NV, CI, DT);
  286. return NV;
  287. }
  288. }
  289. }
  290. // If we are casting a PHI, then fold the cast into the PHI.
  291. if (auto *PN = dyn_cast<PHINode>(Src)) {
  292. // Don't do this if it would create a PHI node with an illegal type from a
  293. // legal type.
  294. if (!Src->getType()->isIntegerTy() || !CI.getType()->isIntegerTy() ||
  295. shouldChangeType(CI.getSrcTy(), CI.getType()))
  296. if (Instruction *NV = foldOpIntoPhi(CI, PN))
  297. return NV;
  298. }
  299. // Canonicalize a unary shuffle after the cast if neither operation changes
  300. // the size or element size of the input vector.
  301. // TODO: We could allow size-changing ops if that doesn't harm codegen.
  302. // cast (shuffle X, Mask) --> shuffle (cast X), Mask
  303. Value *X;
  304. ArrayRef<int> Mask;
  305. if (match(Src, m_OneUse(m_Shuffle(m_Value(X), m_Undef(), m_Mask(Mask))))) {
  306. // TODO: Allow scalable vectors?
  307. auto *SrcTy = dyn_cast<FixedVectorType>(X->getType());
  308. auto *DestTy = dyn_cast<FixedVectorType>(Ty);
  309. if (SrcTy && DestTy &&
  310. SrcTy->getNumElements() == DestTy->getNumElements() &&
  311. SrcTy->getPrimitiveSizeInBits() == DestTy->getPrimitiveSizeInBits()) {
  312. Value *CastX = Builder.CreateCast(CI.getOpcode(), X, DestTy);
  313. return new ShuffleVectorInst(CastX, Mask);
  314. }
  315. }
  316. return nullptr;
  317. }
  318. /// Constants and extensions/truncates from the destination type are always
  319. /// free to be evaluated in that type. This is a helper for canEvaluate*.
  320. static bool canAlwaysEvaluateInType(Value *V, Type *Ty) {
  321. if (isa<Constant>(V))
  322. return true;
  323. Value *X;
  324. if ((match(V, m_ZExtOrSExt(m_Value(X))) || match(V, m_Trunc(m_Value(X)))) &&
  325. X->getType() == Ty)
  326. return true;
  327. return false;
  328. }
  329. /// Filter out values that we can not evaluate in the destination type for free.
  330. /// This is a helper for canEvaluate*.
  331. static bool canNotEvaluateInType(Value *V, Type *Ty) {
  332. assert(!isa<Constant>(V) && "Constant should already be handled.");
  333. if (!isa<Instruction>(V))
  334. return true;
  335. // We don't extend or shrink something that has multiple uses -- doing so
  336. // would require duplicating the instruction which isn't profitable.
  337. if (!V->hasOneUse())
  338. return true;
  339. return false;
  340. }
  341. /// Return true if we can evaluate the specified expression tree as type Ty
  342. /// instead of its larger type, and arrive with the same value.
  343. /// This is used by code that tries to eliminate truncates.
  344. ///
  345. /// Ty will always be a type smaller than V. We should return true if trunc(V)
  346. /// can be computed by computing V in the smaller type. If V is an instruction,
  347. /// then trunc(inst(x,y)) can be computed as inst(trunc(x),trunc(y)), which only
  348. /// makes sense if x and y can be efficiently truncated.
  349. ///
  350. /// This function works on both vectors and scalars.
  351. ///
  352. static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombinerImpl &IC,
  353. Instruction *CxtI) {
  354. if (canAlwaysEvaluateInType(V, Ty))
  355. return true;
  356. if (canNotEvaluateInType(V, Ty))
  357. return false;
  358. auto *I = cast<Instruction>(V);
  359. Type *OrigTy = V->getType();
  360. switch (I->getOpcode()) {
  361. case Instruction::Add:
  362. case Instruction::Sub:
  363. case Instruction::Mul:
  364. case Instruction::And:
  365. case Instruction::Or:
  366. case Instruction::Xor:
  367. // These operators can all arbitrarily be extended or truncated.
  368. return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
  369. canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
  370. case Instruction::UDiv:
  371. case Instruction::URem: {
  372. // UDiv and URem can be truncated if all the truncated bits are zero.
  373. uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
  374. uint32_t BitWidth = Ty->getScalarSizeInBits();
  375. assert(BitWidth < OrigBitWidth && "Unexpected bitwidths!");
  376. APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
  377. if (IC.MaskedValueIsZero(I->getOperand(0), Mask, 0, CxtI) &&
  378. IC.MaskedValueIsZero(I->getOperand(1), Mask, 0, CxtI)) {
  379. return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
  380. canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
  381. }
  382. break;
  383. }
  384. case Instruction::Shl: {
  385. // If we are truncating the result of this SHL, and if it's a shift of an
  386. // inrange amount, we can always perform a SHL in a smaller type.
  387. uint32_t BitWidth = Ty->getScalarSizeInBits();
  388. KnownBits AmtKnownBits =
  389. llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
  390. if (AmtKnownBits.getMaxValue().ult(BitWidth))
  391. return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
  392. canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
  393. break;
  394. }
  395. case Instruction::LShr: {
  396. // If this is a truncate of a logical shr, we can truncate it to a smaller
  397. // lshr iff we know that the bits we would otherwise be shifting in are
  398. // already zeros.
  399. // TODO: It is enough to check that the bits we would be shifting in are
  400. // zero - use AmtKnownBits.getMaxValue().
  401. uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
  402. uint32_t BitWidth = Ty->getScalarSizeInBits();
  403. KnownBits AmtKnownBits =
  404. llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
  405. APInt ShiftedBits = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
  406. if (AmtKnownBits.getMaxValue().ult(BitWidth) &&
  407. IC.MaskedValueIsZero(I->getOperand(0), ShiftedBits, 0, CxtI)) {
  408. return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
  409. canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
  410. }
  411. break;
  412. }
  413. case Instruction::AShr: {
  414. // If this is a truncate of an arithmetic shr, we can truncate it to a
  415. // smaller ashr iff we know that all the bits from the sign bit of the
  416. // original type and the sign bit of the truncate type are similar.
  417. // TODO: It is enough to check that the bits we would be shifting in are
  418. // similar to sign bit of the truncate type.
  419. uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
  420. uint32_t BitWidth = Ty->getScalarSizeInBits();
  421. KnownBits AmtKnownBits =
  422. llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
  423. unsigned ShiftedBits = OrigBitWidth - BitWidth;
  424. if (AmtKnownBits.getMaxValue().ult(BitWidth) &&
  425. ShiftedBits < IC.ComputeNumSignBits(I->getOperand(0), 0, CxtI))
  426. return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
  427. canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
  428. break;
  429. }
  430. case Instruction::Trunc:
  431. // trunc(trunc(x)) -> trunc(x)
  432. return true;
  433. case Instruction::ZExt:
  434. case Instruction::SExt:
  435. // trunc(ext(x)) -> ext(x) if the source type is smaller than the new dest
  436. // trunc(ext(x)) -> trunc(x) if the source type is larger than the new dest
  437. return true;
  438. case Instruction::Select: {
  439. SelectInst *SI = cast<SelectInst>(I);
  440. return canEvaluateTruncated(SI->getTrueValue(), Ty, IC, CxtI) &&
  441. canEvaluateTruncated(SI->getFalseValue(), Ty, IC, CxtI);
  442. }
  443. case Instruction::PHI: {
  444. // We can change a phi if we can change all operands. Note that we never
  445. // get into trouble with cyclic PHIs here because we only consider
  446. // instructions with a single use.
  447. PHINode *PN = cast<PHINode>(I);
  448. for (Value *IncValue : PN->incoming_values())
  449. if (!canEvaluateTruncated(IncValue, Ty, IC, CxtI))
  450. return false;
  451. return true;
  452. }
  453. case Instruction::FPToUI:
  454. case Instruction::FPToSI: {
  455. // If the integer type can hold the max FP value, it is safe to cast
  456. // directly to that type. Otherwise, we may create poison via overflow
  457. // that did not exist in the original code.
  458. //
  459. // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need
  460. // at least one more bit than the MaxExponent to hold the max FP value.
  461. Type *InputTy = I->getOperand(0)->getType()->getScalarType();
  462. const fltSemantics &Semantics = InputTy->getFltSemantics();
  463. uint32_t MinBitWidth = APFloatBase::semanticsMaxExponent(Semantics);
  464. // Extra sign bit needed.
  465. if (I->getOpcode() == Instruction::FPToSI)
  466. ++MinBitWidth;
  467. return Ty->getScalarSizeInBits() > MinBitWidth;
  468. }
  469. default:
  470. // TODO: Can handle more cases here.
  471. break;
  472. }
  473. return false;
  474. }
  475. /// Given a vector that is bitcast to an integer, optionally logically
  476. /// right-shifted, and truncated, convert it to an extractelement.
  477. /// Example (big endian):
  478. /// trunc (lshr (bitcast <4 x i32> %X to i128), 32) to i32
  479. /// --->
  480. /// extractelement <4 x i32> %X, 1
  481. static Instruction *foldVecTruncToExtElt(TruncInst &Trunc,
  482. InstCombinerImpl &IC) {
  483. Value *TruncOp = Trunc.getOperand(0);
  484. Type *DestType = Trunc.getType();
  485. if (!TruncOp->hasOneUse() || !isa<IntegerType>(DestType))
  486. return nullptr;
  487. Value *VecInput = nullptr;
  488. ConstantInt *ShiftVal = nullptr;
  489. if (!match(TruncOp, m_CombineOr(m_BitCast(m_Value(VecInput)),
  490. m_LShr(m_BitCast(m_Value(VecInput)),
  491. m_ConstantInt(ShiftVal)))) ||
  492. !isa<VectorType>(VecInput->getType()))
  493. return nullptr;
  494. VectorType *VecType = cast<VectorType>(VecInput->getType());
  495. unsigned VecWidth = VecType->getPrimitiveSizeInBits();
  496. unsigned DestWidth = DestType->getPrimitiveSizeInBits();
  497. unsigned ShiftAmount = ShiftVal ? ShiftVal->getZExtValue() : 0;
  498. if ((VecWidth % DestWidth != 0) || (ShiftAmount % DestWidth != 0))
  499. return nullptr;
  500. // If the element type of the vector doesn't match the result type,
  501. // bitcast it to a vector type that we can extract from.
  502. unsigned NumVecElts = VecWidth / DestWidth;
  503. if (VecType->getElementType() != DestType) {
  504. VecType = FixedVectorType::get(DestType, NumVecElts);
  505. VecInput = IC.Builder.CreateBitCast(VecInput, VecType, "bc");
  506. }
  507. unsigned Elt = ShiftAmount / DestWidth;
  508. if (IC.getDataLayout().isBigEndian())
  509. Elt = NumVecElts - 1 - Elt;
  510. return ExtractElementInst::Create(VecInput, IC.Builder.getInt32(Elt));
  511. }
  512. /// Funnel/Rotate left/right may occur in a wider type than necessary because of
  513. /// type promotion rules. Try to narrow the inputs and convert to funnel shift.
  514. Instruction *InstCombinerImpl::narrowFunnelShift(TruncInst &Trunc) {
  515. assert((isa<VectorType>(Trunc.getSrcTy()) ||
  516. shouldChangeType(Trunc.getSrcTy(), Trunc.getType())) &&
  517. "Don't narrow to an illegal scalar type");
  518. // Bail out on strange types. It is possible to handle some of these patterns
  519. // even with non-power-of-2 sizes, but it is not a likely scenario.
  520. Type *DestTy = Trunc.getType();
  521. unsigned NarrowWidth = DestTy->getScalarSizeInBits();
  522. unsigned WideWidth = Trunc.getSrcTy()->getScalarSizeInBits();
  523. if (!isPowerOf2_32(NarrowWidth))
  524. return nullptr;
  525. // First, find an or'd pair of opposite shifts:
  526. // trunc (or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1))
  527. BinaryOperator *Or0, *Or1;
  528. if (!match(Trunc.getOperand(0), m_OneUse(m_Or(m_BinOp(Or0), m_BinOp(Or1)))))
  529. return nullptr;
  530. Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1;
  531. if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(ShVal0), m_Value(ShAmt0)))) ||
  532. !match(Or1, m_OneUse(m_LogicalShift(m_Value(ShVal1), m_Value(ShAmt1)))) ||
  533. Or0->getOpcode() == Or1->getOpcode())
  534. return nullptr;
  535. // Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)).
  536. if (Or0->getOpcode() == BinaryOperator::LShr) {
  537. std::swap(Or0, Or1);
  538. std::swap(ShVal0, ShVal1);
  539. std::swap(ShAmt0, ShAmt1);
  540. }
  541. assert(Or0->getOpcode() == BinaryOperator::Shl &&
  542. Or1->getOpcode() == BinaryOperator::LShr &&
  543. "Illegal or(shift,shift) pair");
  544. // Match the shift amount operands for a funnel/rotate pattern. This always
  545. // matches a subtraction on the R operand.
  546. auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * {
  547. // The shift amounts may add up to the narrow bit width:
  548. // (shl ShVal0, L) | (lshr ShVal1, Width - L)
  549. // If this is a funnel shift (different operands are shifted), then the
  550. // shift amount can not over-shift (create poison) in the narrow type.
  551. unsigned MaxShiftAmountWidth = Log2_32(NarrowWidth);
  552. APInt HiBitMask = ~APInt::getLowBitsSet(WideWidth, MaxShiftAmountWidth);
  553. if (ShVal0 == ShVal1 || MaskedValueIsZero(L, HiBitMask))
  554. if (match(R, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(L)))))
  555. return L;
  556. // The following patterns currently only work for rotation patterns.
  557. // TODO: Add more general funnel-shift compatible patterns.
  558. if (ShVal0 != ShVal1)
  559. return nullptr;
  560. // The shift amount may be masked with negation:
  561. // (shl ShVal0, (X & (Width - 1))) | (lshr ShVal1, ((-X) & (Width - 1)))
  562. Value *X;
  563. unsigned Mask = Width - 1;
  564. if (match(L, m_And(m_Value(X), m_SpecificInt(Mask))) &&
  565. match(R, m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask))))
  566. return X;
  567. // Same as above, but the shift amount may be extended after masking:
  568. if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&
  569. match(R, m_ZExt(m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask)))))
  570. return X;
  571. return nullptr;
  572. };
  573. Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, NarrowWidth);
  574. bool IsFshl = true; // Sub on LSHR.
  575. if (!ShAmt) {
  576. ShAmt = matchShiftAmount(ShAmt1, ShAmt0, NarrowWidth);
  577. IsFshl = false; // Sub on SHL.
  578. }
  579. if (!ShAmt)
  580. return nullptr;
  581. // The right-shifted value must have high zeros in the wide type (for example
  582. // from 'zext', 'and' or 'shift'). High bits of the left-shifted value are
  583. // truncated, so those do not matter.
  584. APInt HiBitMask = APInt::getHighBitsSet(WideWidth, WideWidth - NarrowWidth);
  585. if (!MaskedValueIsZero(ShVal1, HiBitMask, 0, &Trunc))
  586. return nullptr;
  587. // We have an unnecessarily wide rotate!
  588. // trunc (or (shl ShVal0, ShAmt), (lshr ShVal1, BitWidth - ShAmt))
  589. // Narrow the inputs and convert to funnel shift intrinsic:
  590. // llvm.fshl.i8(trunc(ShVal), trunc(ShVal), trunc(ShAmt))
  591. Value *NarrowShAmt = Builder.CreateTrunc(ShAmt, DestTy);
  592. Value *X, *Y;
  593. X = Y = Builder.CreateTrunc(ShVal0, DestTy);
  594. if (ShVal0 != ShVal1)
  595. Y = Builder.CreateTrunc(ShVal1, DestTy);
  596. Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;
  597. Function *F = Intrinsic::getDeclaration(Trunc.getModule(), IID, DestTy);
  598. return CallInst::Create(F, {X, Y, NarrowShAmt});
  599. }
  600. /// Try to narrow the width of math or bitwise logic instructions by pulling a
  601. /// truncate ahead of binary operators.
  602. Instruction *InstCombinerImpl::narrowBinOp(TruncInst &Trunc) {
  603. Type *SrcTy = Trunc.getSrcTy();
  604. Type *DestTy = Trunc.getType();
  605. unsigned SrcWidth = SrcTy->getScalarSizeInBits();
  606. unsigned DestWidth = DestTy->getScalarSizeInBits();
  607. if (!isa<VectorType>(SrcTy) && !shouldChangeType(SrcTy, DestTy))
  608. return nullptr;
  609. BinaryOperator *BinOp;
  610. if (!match(Trunc.getOperand(0), m_OneUse(m_BinOp(BinOp))))
  611. return nullptr;
  612. Value *BinOp0 = BinOp->getOperand(0);
  613. Value *BinOp1 = BinOp->getOperand(1);
  614. switch (BinOp->getOpcode()) {
  615. case Instruction::And:
  616. case Instruction::Or:
  617. case Instruction::Xor:
  618. case Instruction::Add:
  619. case Instruction::Sub:
  620. case Instruction::Mul: {
  621. Constant *C;
  622. if (match(BinOp0, m_Constant(C))) {
  623. // trunc (binop C, X) --> binop (trunc C', X)
  624. Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
  625. Value *TruncX = Builder.CreateTrunc(BinOp1, DestTy);
  626. return BinaryOperator::Create(BinOp->getOpcode(), NarrowC, TruncX);
  627. }
  628. if (match(BinOp1, m_Constant(C))) {
  629. // trunc (binop X, C) --> binop (trunc X, C')
  630. Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
  631. Value *TruncX = Builder.CreateTrunc(BinOp0, DestTy);
  632. return BinaryOperator::Create(BinOp->getOpcode(), TruncX, NarrowC);
  633. }
  634. Value *X;
  635. if (match(BinOp0, m_ZExtOrSExt(m_Value(X))) && X->getType() == DestTy) {
  636. // trunc (binop (ext X), Y) --> binop X, (trunc Y)
  637. Value *NarrowOp1 = Builder.CreateTrunc(BinOp1, DestTy);
  638. return BinaryOperator::Create(BinOp->getOpcode(), X, NarrowOp1);
  639. }
  640. if (match(BinOp1, m_ZExtOrSExt(m_Value(X))) && X->getType() == DestTy) {
  641. // trunc (binop Y, (ext X)) --> binop (trunc Y), X
  642. Value *NarrowOp0 = Builder.CreateTrunc(BinOp0, DestTy);
  643. return BinaryOperator::Create(BinOp->getOpcode(), NarrowOp0, X);
  644. }
  645. break;
  646. }
  647. case Instruction::LShr:
  648. case Instruction::AShr: {
  649. // trunc (*shr (trunc A), C) --> trunc(*shr A, C)
  650. Value *A;
  651. Constant *C;
  652. if (match(BinOp0, m_Trunc(m_Value(A))) && match(BinOp1, m_Constant(C))) {
  653. unsigned MaxShiftAmt = SrcWidth - DestWidth;
  654. // If the shift is small enough, all zero/sign bits created by the shift
  655. // are removed by the trunc.
  656. if (match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULE,
  657. APInt(SrcWidth, MaxShiftAmt)))) {
  658. auto *OldShift = cast<Instruction>(Trunc.getOperand(0));
  659. bool IsExact = OldShift->isExact();
  660. auto *ShAmt = ConstantExpr::getIntegerCast(C, A->getType(), true);
  661. ShAmt = Constant::mergeUndefsWith(ShAmt, C);
  662. Value *Shift =
  663. OldShift->getOpcode() == Instruction::AShr
  664. ? Builder.CreateAShr(A, ShAmt, OldShift->getName(), IsExact)
  665. : Builder.CreateLShr(A, ShAmt, OldShift->getName(), IsExact);
  666. return CastInst::CreateTruncOrBitCast(Shift, DestTy);
  667. }
  668. }
  669. break;
  670. }
  671. default: break;
  672. }
  673. if (Instruction *NarrowOr = narrowFunnelShift(Trunc))
  674. return NarrowOr;
  675. return nullptr;
  676. }
  677. /// Try to narrow the width of a splat shuffle. This could be generalized to any
  678. /// shuffle with a constant operand, but we limit the transform to avoid
  679. /// creating a shuffle type that targets may not be able to lower effectively.
  680. static Instruction *shrinkSplatShuffle(TruncInst &Trunc,
  681. InstCombiner::BuilderTy &Builder) {
  682. auto *Shuf = dyn_cast<ShuffleVectorInst>(Trunc.getOperand(0));
  683. if (Shuf && Shuf->hasOneUse() && match(Shuf->getOperand(1), m_Undef()) &&
  684. all_equal(Shuf->getShuffleMask()) &&
  685. Shuf->getType() == Shuf->getOperand(0)->getType()) {
  686. // trunc (shuf X, Undef, SplatMask) --> shuf (trunc X), Poison, SplatMask
  687. // trunc (shuf X, Poison, SplatMask) --> shuf (trunc X), Poison, SplatMask
  688. Value *NarrowOp = Builder.CreateTrunc(Shuf->getOperand(0), Trunc.getType());
  689. return new ShuffleVectorInst(NarrowOp, Shuf->getShuffleMask());
  690. }
  691. return nullptr;
  692. }
  693. /// Try to narrow the width of an insert element. This could be generalized for
  694. /// any vector constant, but we limit the transform to insertion into undef to
  695. /// avoid potential backend problems from unsupported insertion widths. This
  696. /// could also be extended to handle the case of inserting a scalar constant
  697. /// into a vector variable.
  698. static Instruction *shrinkInsertElt(CastInst &Trunc,
  699. InstCombiner::BuilderTy &Builder) {
  700. Instruction::CastOps Opcode = Trunc.getOpcode();
  701. assert((Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) &&
  702. "Unexpected instruction for shrinking");
  703. auto *InsElt = dyn_cast<InsertElementInst>(Trunc.getOperand(0));
  704. if (!InsElt || !InsElt->hasOneUse())
  705. return nullptr;
  706. Type *DestTy = Trunc.getType();
  707. Type *DestScalarTy = DestTy->getScalarType();
  708. Value *VecOp = InsElt->getOperand(0);
  709. Value *ScalarOp = InsElt->getOperand(1);
  710. Value *Index = InsElt->getOperand(2);
  711. if (match(VecOp, m_Undef())) {
  712. // trunc (inselt undef, X, Index) --> inselt undef, (trunc X), Index
  713. // fptrunc (inselt undef, X, Index) --> inselt undef, (fptrunc X), Index
  714. UndefValue *NarrowUndef = UndefValue::get(DestTy);
  715. Value *NarrowOp = Builder.CreateCast(Opcode, ScalarOp, DestScalarTy);
  716. return InsertElementInst::Create(NarrowUndef, NarrowOp, Index);
  717. }
  718. return nullptr;
  719. }
  720. Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) {
  721. if (Instruction *Result = commonCastTransforms(Trunc))
  722. return Result;
  723. Value *Src = Trunc.getOperand(0);
  724. Type *DestTy = Trunc.getType(), *SrcTy = Src->getType();
  725. unsigned DestWidth = DestTy->getScalarSizeInBits();
  726. unsigned SrcWidth = SrcTy->getScalarSizeInBits();
  727. // Attempt to truncate the entire input expression tree to the destination
  728. // type. Only do this if the dest type is a simple type, don't convert the
  729. // expression tree to something weird like i93 unless the source is also
  730. // strange.
  731. if ((DestTy->isVectorTy() || shouldChangeType(SrcTy, DestTy)) &&
  732. canEvaluateTruncated(Src, DestTy, *this, &Trunc)) {
  733. // If this cast is a truncate, evaluting in a different type always
  734. // eliminates the cast, so it is always a win.
  735. LLVM_DEBUG(
  736. dbgs() << "ICE: EvaluateInDifferentType converting expression type"
  737. " to avoid cast: "
  738. << Trunc << '\n');
  739. Value *Res = EvaluateInDifferentType(Src, DestTy, false);
  740. assert(Res->getType() == DestTy);
  741. return replaceInstUsesWith(Trunc, Res);
  742. }
  743. // For integer types, check if we can shorten the entire input expression to
  744. // DestWidth * 2, which won't allow removing the truncate, but reducing the
  745. // width may enable further optimizations, e.g. allowing for larger
  746. // vectorization factors.
  747. if (auto *DestITy = dyn_cast<IntegerType>(DestTy)) {
  748. if (DestWidth * 2 < SrcWidth) {
  749. auto *NewDestTy = DestITy->getExtendedType();
  750. if (shouldChangeType(SrcTy, NewDestTy) &&
  751. canEvaluateTruncated(Src, NewDestTy, *this, &Trunc)) {
  752. LLVM_DEBUG(
  753. dbgs() << "ICE: EvaluateInDifferentType converting expression type"
  754. " to reduce the width of operand of"
  755. << Trunc << '\n');
  756. Value *Res = EvaluateInDifferentType(Src, NewDestTy, false);
  757. return new TruncInst(Res, DestTy);
  758. }
  759. }
  760. }
  761. // Test if the trunc is the user of a select which is part of a
  762. // minimum or maximum operation. If so, don't do any more simplification.
  763. // Even simplifying demanded bits can break the canonical form of a
  764. // min/max.
  765. Value *LHS, *RHS;
  766. if (SelectInst *Sel = dyn_cast<SelectInst>(Src))
  767. if (matchSelectPattern(Sel, LHS, RHS).Flavor != SPF_UNKNOWN)
  768. return nullptr;
  769. // See if we can simplify any instructions used by the input whose sole
  770. // purpose is to compute bits we don't care about.
  771. if (SimplifyDemandedInstructionBits(Trunc))
  772. return &Trunc;
  773. if (DestWidth == 1) {
  774. Value *Zero = Constant::getNullValue(SrcTy);
  775. if (DestTy->isIntegerTy()) {
  776. // Canonicalize trunc x to i1 -> icmp ne (and x, 1), 0 (scalar only).
  777. // TODO: We canonicalize to more instructions here because we are probably
  778. // lacking equivalent analysis for trunc relative to icmp. There may also
  779. // be codegen concerns. If those trunc limitations were removed, we could
  780. // remove this transform.
  781. Value *And = Builder.CreateAnd(Src, ConstantInt::get(SrcTy, 1));
  782. return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
  783. }
  784. // For vectors, we do not canonicalize all truncs to icmp, so optimize
  785. // patterns that would be covered within visitICmpInst.
  786. Value *X;
  787. Constant *C;
  788. if (match(Src, m_OneUse(m_LShr(m_Value(X), m_Constant(C))))) {
  789. // trunc (lshr X, C) to i1 --> icmp ne (and X, C'), 0
  790. Constant *One = ConstantInt::get(SrcTy, APInt(SrcWidth, 1));
  791. Constant *MaskC = ConstantExpr::getShl(One, C);
  792. Value *And = Builder.CreateAnd(X, MaskC);
  793. return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
  794. }
  795. if (match(Src, m_OneUse(m_c_Or(m_LShr(m_Value(X), m_Constant(C)),
  796. m_Deferred(X))))) {
  797. // trunc (or (lshr X, C), X) to i1 --> icmp ne (and X, C'), 0
  798. Constant *One = ConstantInt::get(SrcTy, APInt(SrcWidth, 1));
  799. Constant *MaskC = ConstantExpr::getShl(One, C);
  800. MaskC = ConstantExpr::getOr(MaskC, One);
  801. Value *And = Builder.CreateAnd(X, MaskC);
  802. return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
  803. }
  804. }
  805. Value *A, *B;
  806. Constant *C;
  807. if (match(Src, m_LShr(m_SExt(m_Value(A)), m_Constant(C)))) {
  808. unsigned AWidth = A->getType()->getScalarSizeInBits();
  809. unsigned MaxShiftAmt = SrcWidth - std::max(DestWidth, AWidth);
  810. auto *OldSh = cast<Instruction>(Src);
  811. bool IsExact = OldSh->isExact();
  812. // If the shift is small enough, all zero bits created by the shift are
  813. // removed by the trunc.
  814. if (match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULE,
  815. APInt(SrcWidth, MaxShiftAmt)))) {
  816. // trunc (lshr (sext A), C) --> ashr A, C
  817. if (A->getType() == DestTy) {
  818. Constant *MaxAmt = ConstantInt::get(SrcTy, DestWidth - 1, false);
  819. Constant *ShAmt = ConstantExpr::getUMin(C, MaxAmt);
  820. ShAmt = ConstantExpr::getTrunc(ShAmt, A->getType());
  821. ShAmt = Constant::mergeUndefsWith(ShAmt, C);
  822. return IsExact ? BinaryOperator::CreateExactAShr(A, ShAmt)
  823. : BinaryOperator::CreateAShr(A, ShAmt);
  824. }
  825. // The types are mismatched, so create a cast after shifting:
  826. // trunc (lshr (sext A), C) --> sext/trunc (ashr A, C)
  827. if (Src->hasOneUse()) {
  828. Constant *MaxAmt = ConstantInt::get(SrcTy, AWidth - 1, false);
  829. Constant *ShAmt = ConstantExpr::getUMin(C, MaxAmt);
  830. ShAmt = ConstantExpr::getTrunc(ShAmt, A->getType());
  831. Value *Shift = Builder.CreateAShr(A, ShAmt, "", IsExact);
  832. return CastInst::CreateIntegerCast(Shift, DestTy, true);
  833. }
  834. }
  835. // TODO: Mask high bits with 'and'.
  836. }
  837. if (Instruction *I = narrowBinOp(Trunc))
  838. return I;
  839. if (Instruction *I = shrinkSplatShuffle(Trunc, Builder))
  840. return I;
  841. if (Instruction *I = shrinkInsertElt(Trunc, Builder))
  842. return I;
  843. if (Src->hasOneUse() &&
  844. (isa<VectorType>(SrcTy) || shouldChangeType(SrcTy, DestTy))) {
  845. // Transform "trunc (shl X, cst)" -> "shl (trunc X), cst" so long as the
  846. // dest type is native and cst < dest size.
  847. if (match(Src, m_Shl(m_Value(A), m_Constant(C))) &&
  848. !match(A, m_Shr(m_Value(), m_Constant()))) {
  849. // Skip shifts of shift by constants. It undoes a combine in
  850. // FoldShiftByConstant and is the extend in reg pattern.
  851. APInt Threshold = APInt(C->getType()->getScalarSizeInBits(), DestWidth);
  852. if (match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, Threshold))) {
  853. Value *NewTrunc = Builder.CreateTrunc(A, DestTy, A->getName() + ".tr");
  854. return BinaryOperator::Create(Instruction::Shl, NewTrunc,
  855. ConstantExpr::getTrunc(C, DestTy));
  856. }
  857. }
  858. }
  859. if (Instruction *I = foldVecTruncToExtElt(Trunc, *this))
  860. return I;
  861. // Whenever an element is extracted from a vector, and then truncated,
  862. // canonicalize by converting it to a bitcast followed by an
  863. // extractelement.
  864. //
  865. // Example (little endian):
  866. // trunc (extractelement <4 x i64> %X, 0) to i32
  867. // --->
  868. // extractelement <8 x i32> (bitcast <4 x i64> %X to <8 x i32>), i32 0
  869. Value *VecOp;
  870. ConstantInt *Cst;
  871. if (match(Src, m_OneUse(m_ExtractElt(m_Value(VecOp), m_ConstantInt(Cst))))) {
  872. auto *VecOpTy = cast<VectorType>(VecOp->getType());
  873. auto VecElts = VecOpTy->getElementCount();
  874. // A badly fit destination size would result in an invalid cast.
  875. if (SrcWidth % DestWidth == 0) {
  876. uint64_t TruncRatio = SrcWidth / DestWidth;
  877. uint64_t BitCastNumElts = VecElts.getKnownMinValue() * TruncRatio;
  878. uint64_t VecOpIdx = Cst->getZExtValue();
  879. uint64_t NewIdx = DL.isBigEndian() ? (VecOpIdx + 1) * TruncRatio - 1
  880. : VecOpIdx * TruncRatio;
  881. assert(BitCastNumElts <= std::numeric_limits<uint32_t>::max() &&
  882. "overflow 32-bits");
  883. auto *BitCastTo =
  884. VectorType::get(DestTy, BitCastNumElts, VecElts.isScalable());
  885. Value *BitCast = Builder.CreateBitCast(VecOp, BitCastTo);
  886. return ExtractElementInst::Create(BitCast, Builder.getInt32(NewIdx));
  887. }
  888. }
  889. // trunc (ctlz_i32(zext(A), B) --> add(ctlz_i16(A, B), C)
  890. if (match(Src, m_OneUse(m_Intrinsic<Intrinsic::ctlz>(m_ZExt(m_Value(A)),
  891. m_Value(B))))) {
  892. unsigned AWidth = A->getType()->getScalarSizeInBits();
  893. if (AWidth == DestWidth && AWidth > Log2_32(SrcWidth)) {
  894. Value *WidthDiff = ConstantInt::get(A->getType(), SrcWidth - AWidth);
  895. Value *NarrowCtlz =
  896. Builder.CreateIntrinsic(Intrinsic::ctlz, {Trunc.getType()}, {A, B});
  897. return BinaryOperator::CreateAdd(NarrowCtlz, WidthDiff);
  898. }
  899. }
  900. if (match(Src, m_VScale(DL))) {
  901. if (Trunc.getFunction() &&
  902. Trunc.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
  903. Attribute Attr =
  904. Trunc.getFunction()->getFnAttribute(Attribute::VScaleRange);
  905. if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {
  906. if (Log2_32(*MaxVScale) < DestWidth) {
  907. Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1));
  908. return replaceInstUsesWith(Trunc, VScale);
  909. }
  910. }
  911. }
  912. }
  913. return nullptr;
  914. }
  915. Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp,
  916. ZExtInst &Zext) {
  917. // If we are just checking for a icmp eq of a single bit and zext'ing it
  918. // to an integer, then shift the bit to the appropriate place and then
  919. // cast to integer to avoid the comparison.
  920. // FIXME: This set of transforms does not check for extra uses and/or creates
  921. // an extra instruction (an optional final cast is not included
  922. // in the transform comments). We may also want to favor icmp over
  923. // shifts in cases of equal instructions because icmp has better
  924. // analysis in general (invert the transform).
  925. const APInt *Op1CV;
  926. if (match(Cmp->getOperand(1), m_APInt(Op1CV))) {
  927. // zext (x <s 0) to i32 --> x>>u31 true if signbit set.
  928. if (Cmp->getPredicate() == ICmpInst::ICMP_SLT && Op1CV->isZero()) {
  929. Value *In = Cmp->getOperand(0);
  930. Value *Sh = ConstantInt::get(In->getType(),
  931. In->getType()->getScalarSizeInBits() - 1);
  932. In = Builder.CreateLShr(In, Sh, In->getName() + ".lobit");
  933. if (In->getType() != Zext.getType())
  934. In = Builder.CreateIntCast(In, Zext.getType(), false /*ZExt*/);
  935. return replaceInstUsesWith(Zext, In);
  936. }
  937. // zext (X == 0) to i32 --> X^1 iff X has only the low bit set.
  938. // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
  939. // zext (X != 0) to i32 --> X iff X has only the low bit set.
  940. // zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set.
  941. if (Op1CV->isZero() && Cmp->isEquality() &&
  942. (Cmp->getOperand(0)->getType() == Zext.getType() ||
  943. Cmp->getPredicate() == ICmpInst::ICMP_NE)) {
  944. // If Op1C some other power of two, convert:
  945. KnownBits Known = computeKnownBits(Cmp->getOperand(0), 0, &Zext);
  946. // Exactly 1 possible 1? But not the high-bit because that is
  947. // canonicalized to this form.
  948. APInt KnownZeroMask(~Known.Zero);
  949. if (KnownZeroMask.isPowerOf2() &&
  950. (Zext.getType()->getScalarSizeInBits() !=
  951. KnownZeroMask.logBase2() + 1)) {
  952. uint32_t ShAmt = KnownZeroMask.logBase2();
  953. Value *In = Cmp->getOperand(0);
  954. if (ShAmt) {
  955. // Perform a logical shr by shiftamt.
  956. // Insert the shift to put the result in the low bit.
  957. In = Builder.CreateLShr(In, ConstantInt::get(In->getType(), ShAmt),
  958. In->getName() + ".lobit");
  959. }
  960. // Toggle the low bit for "X == 0".
  961. if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
  962. In = Builder.CreateXor(In, ConstantInt::get(In->getType(), 1));
  963. if (Zext.getType() == In->getType())
  964. return replaceInstUsesWith(Zext, In);
  965. Value *IntCast = Builder.CreateIntCast(In, Zext.getType(), false);
  966. return replaceInstUsesWith(Zext, IntCast);
  967. }
  968. }
  969. }
  970. if (Cmp->isEquality() && Zext.getType() == Cmp->getOperand(0)->getType()) {
  971. // Test if a bit is clear/set using a shifted-one mask:
  972. // zext (icmp eq (and X, (1 << ShAmt)), 0) --> and (lshr (not X), ShAmt), 1
  973. // zext (icmp ne (and X, (1 << ShAmt)), 0) --> and (lshr X, ShAmt), 1
  974. Value *X, *ShAmt;
  975. if (Cmp->hasOneUse() && match(Cmp->getOperand(1), m_ZeroInt()) &&
  976. match(Cmp->getOperand(0),
  977. m_OneUse(m_c_And(m_Shl(m_One(), m_Value(ShAmt)), m_Value(X))))) {
  978. if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
  979. X = Builder.CreateNot(X);
  980. Value *Lshr = Builder.CreateLShr(X, ShAmt);
  981. Value *And1 = Builder.CreateAnd(Lshr, ConstantInt::get(X->getType(), 1));
  982. return replaceInstUsesWith(Zext, And1);
  983. }
  984. }
  985. return nullptr;
  986. }
  987. /// Determine if the specified value can be computed in the specified wider type
  988. /// and produce the same low bits. If not, return false.
  989. ///
  990. /// If this function returns true, it can also return a non-zero number of bits
  991. /// (in BitsToClear) which indicates that the value it computes is correct for
  992. /// the zero extend, but that the additional BitsToClear bits need to be zero'd
  993. /// out. For example, to promote something like:
  994. ///
  995. /// %B = trunc i64 %A to i32
  996. /// %C = lshr i32 %B, 8
  997. /// %E = zext i32 %C to i64
  998. ///
  999. /// CanEvaluateZExtd for the 'lshr' will return true, and BitsToClear will be
  1000. /// set to 8 to indicate that the promoted value needs to have bits 24-31
  1001. /// cleared in addition to bits 32-63. Since an 'and' will be generated to
  1002. /// clear the top bits anyway, doing this has no extra cost.
  1003. ///
  1004. /// This function works on both vectors and scalars.
  1005. static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,
  1006. InstCombinerImpl &IC, Instruction *CxtI) {
  1007. BitsToClear = 0;
  1008. if (canAlwaysEvaluateInType(V, Ty))
  1009. return true;
  1010. if (canNotEvaluateInType(V, Ty))
  1011. return false;
  1012. auto *I = cast<Instruction>(V);
  1013. unsigned Tmp;
  1014. switch (I->getOpcode()) {
  1015. case Instruction::ZExt: // zext(zext(x)) -> zext(x).
  1016. case Instruction::SExt: // zext(sext(x)) -> sext(x).
  1017. case Instruction::Trunc: // zext(trunc(x)) -> trunc(x) or zext(x)
  1018. return true;
  1019. case Instruction::And:
  1020. case Instruction::Or:
  1021. case Instruction::Xor:
  1022. case Instruction::Add:
  1023. case Instruction::Sub:
  1024. case Instruction::Mul:
  1025. if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI) ||
  1026. !canEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI))
  1027. return false;
  1028. // These can all be promoted if neither operand has 'bits to clear'.
  1029. if (BitsToClear == 0 && Tmp == 0)
  1030. return true;
  1031. // If the operation is an AND/OR/XOR and the bits to clear are zero in the
  1032. // other side, BitsToClear is ok.
  1033. if (Tmp == 0 && I->isBitwiseLogicOp()) {
  1034. // We use MaskedValueIsZero here for generality, but the case we care
  1035. // about the most is constant RHS.
  1036. unsigned VSize = V->getType()->getScalarSizeInBits();
  1037. if (IC.MaskedValueIsZero(I->getOperand(1),
  1038. APInt::getHighBitsSet(VSize, BitsToClear),
  1039. 0, CxtI)) {
  1040. // If this is an And instruction and all of the BitsToClear are
  1041. // known to be zero we can reset BitsToClear.
  1042. if (I->getOpcode() == Instruction::And)
  1043. BitsToClear = 0;
  1044. return true;
  1045. }
  1046. }
  1047. // Otherwise, we don't know how to analyze this BitsToClear case yet.
  1048. return false;
  1049. case Instruction::Shl: {
  1050. // We can promote shl(x, cst) if we can promote x. Since shl overwrites the
  1051. // upper bits we can reduce BitsToClear by the shift amount.
  1052. const APInt *Amt;
  1053. if (match(I->getOperand(1), m_APInt(Amt))) {
  1054. if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
  1055. return false;
  1056. uint64_t ShiftAmt = Amt->getZExtValue();
  1057. BitsToClear = ShiftAmt < BitsToClear ? BitsToClear - ShiftAmt : 0;
  1058. return true;
  1059. }
  1060. return false;
  1061. }
  1062. case Instruction::LShr: {
  1063. // We can promote lshr(x, cst) if we can promote x. This requires the
  1064. // ultimate 'and' to clear out the high zero bits we're clearing out though.
  1065. const APInt *Amt;
  1066. if (match(I->getOperand(1), m_APInt(Amt))) {
  1067. if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
  1068. return false;
  1069. BitsToClear += Amt->getZExtValue();
  1070. if (BitsToClear > V->getType()->getScalarSizeInBits())
  1071. BitsToClear = V->getType()->getScalarSizeInBits();
  1072. return true;
  1073. }
  1074. // Cannot promote variable LSHR.
  1075. return false;
  1076. }
  1077. case Instruction::Select:
  1078. if (!canEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI) ||
  1079. !canEvaluateZExtd(I->getOperand(2), Ty, BitsToClear, IC, CxtI) ||
  1080. // TODO: If important, we could handle the case when the BitsToClear are
  1081. // known zero in the disagreeing side.
  1082. Tmp != BitsToClear)
  1083. return false;
  1084. return true;
  1085. case Instruction::PHI: {
  1086. // We can change a phi if we can change all operands. Note that we never
  1087. // get into trouble with cyclic PHIs here because we only consider
  1088. // instructions with a single use.
  1089. PHINode *PN = cast<PHINode>(I);
  1090. if (!canEvaluateZExtd(PN->getIncomingValue(0), Ty, BitsToClear, IC, CxtI))
  1091. return false;
  1092. for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i)
  1093. if (!canEvaluateZExtd(PN->getIncomingValue(i), Ty, Tmp, IC, CxtI) ||
  1094. // TODO: If important, we could handle the case when the BitsToClear
  1095. // are known zero in the disagreeing input.
  1096. Tmp != BitsToClear)
  1097. return false;
  1098. return true;
  1099. }
  1100. default:
  1101. // TODO: Can handle more cases here.
  1102. return false;
  1103. }
  1104. }
  1105. Instruction *InstCombinerImpl::visitZExt(ZExtInst &Zext) {
  1106. // If this zero extend is only used by a truncate, let the truncate be
  1107. // eliminated before we try to optimize this zext.
  1108. if (Zext.hasOneUse() && isa<TruncInst>(Zext.user_back()))
  1109. return nullptr;
  1110. // If one of the common conversion will work, do it.
  1111. if (Instruction *Result = commonCastTransforms(Zext))
  1112. return Result;
  1113. Value *Src = Zext.getOperand(0);
  1114. Type *SrcTy = Src->getType(), *DestTy = Zext.getType();
  1115. // Try to extend the entire expression tree to the wide destination type.
  1116. unsigned BitsToClear;
  1117. if (shouldChangeType(SrcTy, DestTy) &&
  1118. canEvaluateZExtd(Src, DestTy, BitsToClear, *this, &Zext)) {
  1119. assert(BitsToClear <= SrcTy->getScalarSizeInBits() &&
  1120. "Can't clear more bits than in SrcTy");
  1121. // Okay, we can transform this! Insert the new expression now.
  1122. LLVM_DEBUG(
  1123. dbgs() << "ICE: EvaluateInDifferentType converting expression type"
  1124. " to avoid zero extend: "
  1125. << Zext << '\n');
  1126. Value *Res = EvaluateInDifferentType(Src, DestTy, false);
  1127. assert(Res->getType() == DestTy);
  1128. // Preserve debug values referring to Src if the zext is its last use.
  1129. if (auto *SrcOp = dyn_cast<Instruction>(Src))
  1130. if (SrcOp->hasOneUse())
  1131. replaceAllDbgUsesWith(*SrcOp, *Res, Zext, DT);
  1132. uint32_t SrcBitsKept = SrcTy->getScalarSizeInBits() - BitsToClear;
  1133. uint32_t DestBitSize = DestTy->getScalarSizeInBits();
  1134. // If the high bits are already filled with zeros, just replace this
  1135. // cast with the result.
  1136. if (MaskedValueIsZero(Res,
  1137. APInt::getHighBitsSet(DestBitSize,
  1138. DestBitSize - SrcBitsKept),
  1139. 0, &Zext))
  1140. return replaceInstUsesWith(Zext, Res);
  1141. // We need to emit an AND to clear the high bits.
  1142. Constant *C = ConstantInt::get(Res->getType(),
  1143. APInt::getLowBitsSet(DestBitSize, SrcBitsKept));
  1144. return BinaryOperator::CreateAnd(Res, C);
  1145. }
  1146. // If this is a TRUNC followed by a ZEXT then we are dealing with integral
  1147. // types and if the sizes are just right we can convert this into a logical
  1148. // 'and' which will be much cheaper than the pair of casts.
  1149. if (auto *CSrc = dyn_cast<TruncInst>(Src)) { // A->B->C cast
  1150. // TODO: Subsume this into EvaluateInDifferentType.
  1151. // Get the sizes of the types involved. We know that the intermediate type
  1152. // will be smaller than A or C, but don't know the relation between A and C.
  1153. Value *A = CSrc->getOperand(0);
  1154. unsigned SrcSize = A->getType()->getScalarSizeInBits();
  1155. unsigned MidSize = CSrc->getType()->getScalarSizeInBits();
  1156. unsigned DstSize = DestTy->getScalarSizeInBits();
  1157. // If we're actually extending zero bits, then if
  1158. // SrcSize < DstSize: zext(a & mask)
  1159. // SrcSize == DstSize: a & mask
  1160. // SrcSize > DstSize: trunc(a) & mask
  1161. if (SrcSize < DstSize) {
  1162. APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
  1163. Constant *AndConst = ConstantInt::get(A->getType(), AndValue);
  1164. Value *And = Builder.CreateAnd(A, AndConst, CSrc->getName() + ".mask");
  1165. return new ZExtInst(And, DestTy);
  1166. }
  1167. if (SrcSize == DstSize) {
  1168. APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
  1169. return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(),
  1170. AndValue));
  1171. }
  1172. if (SrcSize > DstSize) {
  1173. Value *Trunc = Builder.CreateTrunc(A, DestTy);
  1174. APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));
  1175. return BinaryOperator::CreateAnd(Trunc,
  1176. ConstantInt::get(Trunc->getType(),
  1177. AndValue));
  1178. }
  1179. }
  1180. if (auto *Cmp = dyn_cast<ICmpInst>(Src))
  1181. return transformZExtICmp(Cmp, Zext);
  1182. // zext(trunc(X) & C) -> (X & zext(C)).
  1183. Constant *C;
  1184. Value *X;
  1185. if (match(Src, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Constant(C)))) &&
  1186. X->getType() == DestTy)
  1187. return BinaryOperator::CreateAnd(X, ConstantExpr::getZExt(C, DestTy));
  1188. // zext((trunc(X) & C) ^ C) -> ((X & zext(C)) ^ zext(C)).
  1189. Value *And;
  1190. if (match(Src, m_OneUse(m_Xor(m_Value(And), m_Constant(C)))) &&
  1191. match(And, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Specific(C)))) &&
  1192. X->getType() == DestTy) {
  1193. Constant *ZC = ConstantExpr::getZExt(C, DestTy);
  1194. return BinaryOperator::CreateXor(Builder.CreateAnd(X, ZC), ZC);
  1195. }
  1196. // If we are truncating, masking, and then zexting back to the original type,
  1197. // that's just a mask. This is not handled by canEvaluateZextd if the
  1198. // intermediate values have extra uses. This could be generalized further for
  1199. // a non-constant mask operand.
  1200. // zext (and (trunc X), C) --> and X, (zext C)
  1201. if (match(Src, m_And(m_Trunc(m_Value(X)), m_Constant(C))) &&
  1202. X->getType() == DestTy) {
  1203. Constant *ZextC = ConstantExpr::getZExt(C, DestTy);
  1204. return BinaryOperator::CreateAnd(X, ZextC);
  1205. }
  1206. if (match(Src, m_VScale(DL))) {
  1207. if (Zext.getFunction() &&
  1208. Zext.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
  1209. Attribute Attr =
  1210. Zext.getFunction()->getFnAttribute(Attribute::VScaleRange);
  1211. if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {
  1212. unsigned TypeWidth = Src->getType()->getScalarSizeInBits();
  1213. if (Log2_32(*MaxVScale) < TypeWidth) {
  1214. Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1));
  1215. return replaceInstUsesWith(Zext, VScale);
  1216. }
  1217. }
  1218. }
  1219. }
  1220. return nullptr;
  1221. }
  1222. /// Transform (sext icmp) to bitwise / integer operations to eliminate the icmp.
  1223. Instruction *InstCombinerImpl::transformSExtICmp(ICmpInst *Cmp,
  1224. SExtInst &Sext) {
  1225. Value *Op0 = Cmp->getOperand(0), *Op1 = Cmp->getOperand(1);
  1226. ICmpInst::Predicate Pred = Cmp->getPredicate();
  1227. // Don't bother if Op1 isn't of vector or integer type.
  1228. if (!Op1->getType()->isIntOrIntVectorTy())
  1229. return nullptr;
  1230. if (Pred == ICmpInst::ICMP_SLT && match(Op1, m_ZeroInt())) {
  1231. // sext (x <s 0) --> ashr x, 31 (all ones if negative)
  1232. Value *Sh = ConstantInt::get(Op0->getType(),
  1233. Op0->getType()->getScalarSizeInBits() - 1);
  1234. Value *In = Builder.CreateAShr(Op0, Sh, Op0->getName() + ".lobit");
  1235. if (In->getType() != Sext.getType())
  1236. In = Builder.CreateIntCast(In, Sext.getType(), true /*SExt*/);
  1237. return replaceInstUsesWith(Sext, In);
  1238. }
  1239. if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
  1240. // If we know that only one bit of the LHS of the icmp can be set and we
  1241. // have an equality comparison with zero or a power of 2, we can transform
  1242. // the icmp and sext into bitwise/integer operations.
  1243. if (Cmp->hasOneUse() &&
  1244. Cmp->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){
  1245. KnownBits Known = computeKnownBits(Op0, 0, &Sext);
  1246. APInt KnownZeroMask(~Known.Zero);
  1247. if (KnownZeroMask.isPowerOf2()) {
  1248. Value *In = Cmp->getOperand(0);
  1249. // If the icmp tests for a known zero bit we can constant fold it.
  1250. if (!Op1C->isZero() && Op1C->getValue() != KnownZeroMask) {
  1251. Value *V = Pred == ICmpInst::ICMP_NE ?
  1252. ConstantInt::getAllOnesValue(Sext.getType()) :
  1253. ConstantInt::getNullValue(Sext.getType());
  1254. return replaceInstUsesWith(Sext, V);
  1255. }
  1256. if (!Op1C->isZero() == (Pred == ICmpInst::ICMP_NE)) {
  1257. // sext ((x & 2^n) == 0) -> (x >> n) - 1
  1258. // sext ((x & 2^n) != 2^n) -> (x >> n) - 1
  1259. unsigned ShiftAmt = KnownZeroMask.countTrailingZeros();
  1260. // Perform a right shift to place the desired bit in the LSB.
  1261. if (ShiftAmt)
  1262. In = Builder.CreateLShr(In,
  1263. ConstantInt::get(In->getType(), ShiftAmt));
  1264. // At this point "In" is either 1 or 0. Subtract 1 to turn
  1265. // {1, 0} -> {0, -1}.
  1266. In = Builder.CreateAdd(In,
  1267. ConstantInt::getAllOnesValue(In->getType()),
  1268. "sext");
  1269. } else {
  1270. // sext ((x & 2^n) != 0) -> (x << bitwidth-n) a>> bitwidth-1
  1271. // sext ((x & 2^n) == 2^n) -> (x << bitwidth-n) a>> bitwidth-1
  1272. unsigned ShiftAmt = KnownZeroMask.countLeadingZeros();
  1273. // Perform a left shift to place the desired bit in the MSB.
  1274. if (ShiftAmt)
  1275. In = Builder.CreateShl(In,
  1276. ConstantInt::get(In->getType(), ShiftAmt));
  1277. // Distribute the bit over the whole bit width.
  1278. In = Builder.CreateAShr(In, ConstantInt::get(In->getType(),
  1279. KnownZeroMask.getBitWidth() - 1), "sext");
  1280. }
  1281. if (Sext.getType() == In->getType())
  1282. return replaceInstUsesWith(Sext, In);
  1283. return CastInst::CreateIntegerCast(In, Sext.getType(), true/*SExt*/);
  1284. }
  1285. }
  1286. }
  1287. return nullptr;
  1288. }
  1289. /// Return true if we can take the specified value and return it as type Ty
  1290. /// without inserting any new casts and without changing the value of the common
  1291. /// low bits. This is used by code that tries to promote integer operations to
  1292. /// a wider types will allow us to eliminate the extension.
  1293. ///
  1294. /// This function works on both vectors and scalars.
  1295. ///
  1296. static bool canEvaluateSExtd(Value *V, Type *Ty) {
  1297. assert(V->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits() &&
  1298. "Can't sign extend type to a smaller type");
  1299. if (canAlwaysEvaluateInType(V, Ty))
  1300. return true;
  1301. if (canNotEvaluateInType(V, Ty))
  1302. return false;
  1303. auto *I = cast<Instruction>(V);
  1304. switch (I->getOpcode()) {
  1305. case Instruction::SExt: // sext(sext(x)) -> sext(x)
  1306. case Instruction::ZExt: // sext(zext(x)) -> zext(x)
  1307. case Instruction::Trunc: // sext(trunc(x)) -> trunc(x) or sext(x)
  1308. return true;
  1309. case Instruction::And:
  1310. case Instruction::Or:
  1311. case Instruction::Xor:
  1312. case Instruction::Add:
  1313. case Instruction::Sub:
  1314. case Instruction::Mul:
  1315. // These operators can all arbitrarily be extended if their inputs can.
  1316. return canEvaluateSExtd(I->getOperand(0), Ty) &&
  1317. canEvaluateSExtd(I->getOperand(1), Ty);
  1318. //case Instruction::Shl: TODO
  1319. //case Instruction::LShr: TODO
  1320. case Instruction::Select:
  1321. return canEvaluateSExtd(I->getOperand(1), Ty) &&
  1322. canEvaluateSExtd(I->getOperand(2), Ty);
  1323. case Instruction::PHI: {
  1324. // We can change a phi if we can change all operands. Note that we never
  1325. // get into trouble with cyclic PHIs here because we only consider
  1326. // instructions with a single use.
  1327. PHINode *PN = cast<PHINode>(I);
  1328. for (Value *IncValue : PN->incoming_values())
  1329. if (!canEvaluateSExtd(IncValue, Ty)) return false;
  1330. return true;
  1331. }
  1332. default:
  1333. // TODO: Can handle more cases here.
  1334. break;
  1335. }
  1336. return false;
  1337. }
  1338. Instruction *InstCombinerImpl::visitSExt(SExtInst &Sext) {
  1339. // If this sign extend is only used by a truncate, let the truncate be
  1340. // eliminated before we try to optimize this sext.
  1341. if (Sext.hasOneUse() && isa<TruncInst>(Sext.user_back()))
  1342. return nullptr;
  1343. if (Instruction *I = commonCastTransforms(Sext))
  1344. return I;
  1345. Value *Src = Sext.getOperand(0);
  1346. Type *SrcTy = Src->getType(), *DestTy = Sext.getType();
  1347. unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
  1348. unsigned DestBitSize = DestTy->getScalarSizeInBits();
  1349. // If the value being extended is zero or positive, use a zext instead.
  1350. if (isKnownNonNegative(Src, DL, 0, &AC, &Sext, &DT))
  1351. return CastInst::Create(Instruction::ZExt, Src, DestTy);
  1352. // Try to extend the entire expression tree to the wide destination type.
  1353. if (shouldChangeType(SrcTy, DestTy) && canEvaluateSExtd(Src, DestTy)) {
  1354. // Okay, we can transform this! Insert the new expression now.
  1355. LLVM_DEBUG(
  1356. dbgs() << "ICE: EvaluateInDifferentType converting expression type"
  1357. " to avoid sign extend: "
  1358. << Sext << '\n');
  1359. Value *Res = EvaluateInDifferentType(Src, DestTy, true);
  1360. assert(Res->getType() == DestTy);
  1361. // If the high bits are already filled with sign bit, just replace this
  1362. // cast with the result.
  1363. if (ComputeNumSignBits(Res, 0, &Sext) > DestBitSize - SrcBitSize)
  1364. return replaceInstUsesWith(Sext, Res);
  1365. // We need to emit a shl + ashr to do the sign extend.
  1366. Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize);
  1367. return BinaryOperator::CreateAShr(Builder.CreateShl(Res, ShAmt, "sext"),
  1368. ShAmt);
  1369. }
  1370. Value *X;
  1371. if (match(Src, m_Trunc(m_Value(X)))) {
  1372. // If the input has more sign bits than bits truncated, then convert
  1373. // directly to final type.
  1374. unsigned XBitSize = X->getType()->getScalarSizeInBits();
  1375. if (ComputeNumSignBits(X, 0, &Sext) > XBitSize - SrcBitSize)
  1376. return CastInst::CreateIntegerCast(X, DestTy, /* isSigned */ true);
  1377. // If input is a trunc from the destination type, then convert into shifts.
  1378. if (Src->hasOneUse() && X->getType() == DestTy) {
  1379. // sext (trunc X) --> ashr (shl X, C), C
  1380. Constant *ShAmt = ConstantInt::get(DestTy, DestBitSize - SrcBitSize);
  1381. return BinaryOperator::CreateAShr(Builder.CreateShl(X, ShAmt), ShAmt);
  1382. }
  1383. // If we are replacing shifted-in high zero bits with sign bits, convert
  1384. // the logic shift to arithmetic shift and eliminate the cast to
  1385. // intermediate type:
  1386. // sext (trunc (lshr Y, C)) --> sext/trunc (ashr Y, C)
  1387. Value *Y;
  1388. if (Src->hasOneUse() &&
  1389. match(X, m_LShr(m_Value(Y),
  1390. m_SpecificIntAllowUndef(XBitSize - SrcBitSize)))) {
  1391. Value *Ashr = Builder.CreateAShr(Y, XBitSize - SrcBitSize);
  1392. return CastInst::CreateIntegerCast(Ashr, DestTy, /* isSigned */ true);
  1393. }
  1394. }
  1395. if (auto *Cmp = dyn_cast<ICmpInst>(Src))
  1396. return transformSExtICmp(Cmp, Sext);
  1397. // If the input is a shl/ashr pair of a same constant, then this is a sign
  1398. // extension from a smaller value. If we could trust arbitrary bitwidth
  1399. // integers, we could turn this into a truncate to the smaller bit and then
  1400. // use a sext for the whole extension. Since we don't, look deeper and check
  1401. // for a truncate. If the source and dest are the same type, eliminate the
  1402. // trunc and extend and just do shifts. For example, turn:
  1403. // %a = trunc i32 %i to i8
  1404. // %b = shl i8 %a, C
  1405. // %c = ashr i8 %b, C
  1406. // %d = sext i8 %c to i32
  1407. // into:
  1408. // %a = shl i32 %i, 32-(8-C)
  1409. // %d = ashr i32 %a, 32-(8-C)
  1410. Value *A = nullptr;
  1411. // TODO: Eventually this could be subsumed by EvaluateInDifferentType.
  1412. Constant *BA = nullptr, *CA = nullptr;
  1413. if (match(Src, m_AShr(m_Shl(m_Trunc(m_Value(A)), m_Constant(BA)),
  1414. m_Constant(CA))) &&
  1415. BA->isElementWiseEqual(CA) && A->getType() == DestTy) {
  1416. Constant *WideCurrShAmt = ConstantExpr::getSExt(CA, DestTy);
  1417. Constant *NumLowbitsLeft = ConstantExpr::getSub(
  1418. ConstantInt::get(DestTy, SrcTy->getScalarSizeInBits()), WideCurrShAmt);
  1419. Constant *NewShAmt = ConstantExpr::getSub(
  1420. ConstantInt::get(DestTy, DestTy->getScalarSizeInBits()),
  1421. NumLowbitsLeft);
  1422. NewShAmt =
  1423. Constant::mergeUndefsWith(Constant::mergeUndefsWith(NewShAmt, BA), CA);
  1424. A = Builder.CreateShl(A, NewShAmt, Sext.getName());
  1425. return BinaryOperator::CreateAShr(A, NewShAmt);
  1426. }
  1427. // Splatting a bit of constant-index across a value:
  1428. // sext (ashr (trunc iN X to iM), M-1) to iN --> ashr (shl X, N-M), N-1
  1429. // If the dest type is different, use a cast (adjust use check).
  1430. if (match(Src, m_OneUse(m_AShr(m_Trunc(m_Value(X)),
  1431. m_SpecificInt(SrcBitSize - 1))))) {
  1432. Type *XTy = X->getType();
  1433. unsigned XBitSize = XTy->getScalarSizeInBits();
  1434. Constant *ShlAmtC = ConstantInt::get(XTy, XBitSize - SrcBitSize);
  1435. Constant *AshrAmtC = ConstantInt::get(XTy, XBitSize - 1);
  1436. if (XTy == DestTy)
  1437. return BinaryOperator::CreateAShr(Builder.CreateShl(X, ShlAmtC),
  1438. AshrAmtC);
  1439. if (cast<BinaryOperator>(Src)->getOperand(0)->hasOneUse()) {
  1440. Value *Ashr = Builder.CreateAShr(Builder.CreateShl(X, ShlAmtC), AshrAmtC);
  1441. return CastInst::CreateIntegerCast(Ashr, DestTy, /* isSigned */ true);
  1442. }
  1443. }
  1444. if (match(Src, m_VScale(DL))) {
  1445. if (Sext.getFunction() &&
  1446. Sext.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
  1447. Attribute Attr =
  1448. Sext.getFunction()->getFnAttribute(Attribute::VScaleRange);
  1449. if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {
  1450. if (Log2_32(*MaxVScale) < (SrcBitSize - 1)) {
  1451. Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1));
  1452. return replaceInstUsesWith(Sext, VScale);
  1453. }
  1454. }
  1455. }
  1456. }
  1457. return nullptr;
  1458. }
  1459. /// Return a Constant* for the specified floating-point constant if it fits
  1460. /// in the specified FP type without changing its value.
  1461. static bool fitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) {
  1462. bool losesInfo;
  1463. APFloat F = CFP->getValueAPF();
  1464. (void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo);
  1465. return !losesInfo;
  1466. }
  1467. static Type *shrinkFPConstant(ConstantFP *CFP) {
  1468. if (CFP->getType() == Type::getPPC_FP128Ty(CFP->getContext()))
  1469. return nullptr; // No constant folding of this.
  1470. // See if the value can be truncated to half and then reextended.
  1471. if (fitsInFPType(CFP, APFloat::IEEEhalf()))
  1472. return Type::getHalfTy(CFP->getContext());
  1473. // See if the value can be truncated to float and then reextended.
  1474. if (fitsInFPType(CFP, APFloat::IEEEsingle()))
  1475. return Type::getFloatTy(CFP->getContext());
  1476. if (CFP->getType()->isDoubleTy())
  1477. return nullptr; // Won't shrink.
  1478. if (fitsInFPType(CFP, APFloat::IEEEdouble()))
  1479. return Type::getDoubleTy(CFP->getContext());
  1480. // Don't try to shrink to various long double types.
  1481. return nullptr;
  1482. }
  1483. // Determine if this is a vector of ConstantFPs and if so, return the minimal
  1484. // type we can safely truncate all elements to.
  1485. static Type *shrinkFPConstantVector(Value *V) {
  1486. auto *CV = dyn_cast<Constant>(V);
  1487. auto *CVVTy = dyn_cast<FixedVectorType>(V->getType());
  1488. if (!CV || !CVVTy)
  1489. return nullptr;
  1490. Type *MinType = nullptr;
  1491. unsigned NumElts = CVVTy->getNumElements();
  1492. // For fixed-width vectors we find the minimal type by looking
  1493. // through the constant values of the vector.
  1494. for (unsigned i = 0; i != NumElts; ++i) {
  1495. if (isa<UndefValue>(CV->getAggregateElement(i)))
  1496. continue;
  1497. auto *CFP = dyn_cast_or_null<ConstantFP>(CV->getAggregateElement(i));
  1498. if (!CFP)
  1499. return nullptr;
  1500. Type *T = shrinkFPConstant(CFP);
  1501. if (!T)
  1502. return nullptr;
  1503. // If we haven't found a type yet or this type has a larger mantissa than
  1504. // our previous type, this is our new minimal type.
  1505. if (!MinType || T->getFPMantissaWidth() > MinType->getFPMantissaWidth())
  1506. MinType = T;
  1507. }
  1508. // Make a vector type from the minimal type.
  1509. return MinType ? FixedVectorType::get(MinType, NumElts) : nullptr;
  1510. }
  1511. /// Find the minimum FP type we can safely truncate to.
  1512. static Type *getMinimumFPType(Value *V) {
  1513. if (auto *FPExt = dyn_cast<FPExtInst>(V))
  1514. return FPExt->getOperand(0)->getType();
  1515. // If this value is a constant, return the constant in the smallest FP type
  1516. // that can accurately represent it. This allows us to turn
  1517. // (float)((double)X+2.0) into x+2.0f.
  1518. if (auto *CFP = dyn_cast<ConstantFP>(V))
  1519. if (Type *T = shrinkFPConstant(CFP))
  1520. return T;
  1521. // We can only correctly find a minimum type for a scalable vector when it is
  1522. // a splat. For splats of constant values the fpext is wrapped up as a
  1523. // ConstantExpr.
  1524. if (auto *FPCExt = dyn_cast<ConstantExpr>(V))
  1525. if (FPCExt->getOpcode() == Instruction::FPExt)
  1526. return FPCExt->getOperand(0)->getType();
  1527. // Try to shrink a vector of FP constants. This returns nullptr on scalable
  1528. // vectors
  1529. if (Type *T = shrinkFPConstantVector(V))
  1530. return T;
  1531. return V->getType();
  1532. }
  1533. /// Return true if the cast from integer to FP can be proven to be exact for all
  1534. /// possible inputs (the conversion does not lose any precision).
  1535. static bool isKnownExactCastIntToFP(CastInst &I, InstCombinerImpl &IC) {
  1536. CastInst::CastOps Opcode = I.getOpcode();
  1537. assert((Opcode == CastInst::SIToFP || Opcode == CastInst::UIToFP) &&
  1538. "Unexpected cast");
  1539. Value *Src = I.getOperand(0);
  1540. Type *SrcTy = Src->getType();
  1541. Type *FPTy = I.getType();
  1542. bool IsSigned = Opcode == Instruction::SIToFP;
  1543. int SrcSize = (int)SrcTy->getScalarSizeInBits() - IsSigned;
  1544. // Easy case - if the source integer type has less bits than the FP mantissa,
  1545. // then the cast must be exact.
  1546. int DestNumSigBits = FPTy->getFPMantissaWidth();
  1547. if (SrcSize <= DestNumSigBits)
  1548. return true;
  1549. // Cast from FP to integer and back to FP is independent of the intermediate
  1550. // integer width because of poison on overflow.
  1551. Value *F;
  1552. if (match(Src, m_FPToSI(m_Value(F))) || match(Src, m_FPToUI(m_Value(F)))) {
  1553. // If this is uitofp (fptosi F), the source needs an extra bit to avoid
  1554. // potential rounding of negative FP input values.
  1555. int SrcNumSigBits = F->getType()->getFPMantissaWidth();
  1556. if (!IsSigned && match(Src, m_FPToSI(m_Value())))
  1557. SrcNumSigBits++;
  1558. // [su]itofp (fpto[su]i F) --> exact if the source type has less or equal
  1559. // significant bits than the destination (and make sure neither type is
  1560. // weird -- ppc_fp128).
  1561. if (SrcNumSigBits > 0 && DestNumSigBits > 0 &&
  1562. SrcNumSigBits <= DestNumSigBits)
  1563. return true;
  1564. }
  1565. // TODO:
  1566. // Try harder to find if the source integer type has less significant bits.
  1567. // For example, compute number of sign bits.
  1568. KnownBits SrcKnown = IC.computeKnownBits(Src, 0, &I);
  1569. int SigBits = (int)SrcTy->getScalarSizeInBits() -
  1570. SrcKnown.countMinLeadingZeros() -
  1571. SrcKnown.countMinTrailingZeros();
  1572. if (SigBits <= DestNumSigBits)
  1573. return true;
  1574. return false;
  1575. }
  1576. Instruction *InstCombinerImpl::visitFPTrunc(FPTruncInst &FPT) {
  1577. if (Instruction *I = commonCastTransforms(FPT))
  1578. return I;
  1579. // If we have fptrunc(OpI (fpextend x), (fpextend y)), we would like to
  1580. // simplify this expression to avoid one or more of the trunc/extend
  1581. // operations if we can do so without changing the numerical results.
  1582. //
  1583. // The exact manner in which the widths of the operands interact to limit
  1584. // what we can and cannot do safely varies from operation to operation, and
  1585. // is explained below in the various case statements.
  1586. Type *Ty = FPT.getType();
  1587. auto *BO = dyn_cast<BinaryOperator>(FPT.getOperand(0));
  1588. if (BO && BO->hasOneUse()) {
  1589. Type *LHSMinType = getMinimumFPType(BO->getOperand(0));
  1590. Type *RHSMinType = getMinimumFPType(BO->getOperand(1));
  1591. unsigned OpWidth = BO->getType()->getFPMantissaWidth();
  1592. unsigned LHSWidth = LHSMinType->getFPMantissaWidth();
  1593. unsigned RHSWidth = RHSMinType->getFPMantissaWidth();
  1594. unsigned SrcWidth = std::max(LHSWidth, RHSWidth);
  1595. unsigned DstWidth = Ty->getFPMantissaWidth();
  1596. switch (BO->getOpcode()) {
  1597. default: break;
  1598. case Instruction::FAdd:
  1599. case Instruction::FSub:
  1600. // For addition and subtraction, the infinitely precise result can
  1601. // essentially be arbitrarily wide; proving that double rounding
  1602. // will not occur because the result of OpI is exact (as we will for
  1603. // FMul, for example) is hopeless. However, we *can* nonetheless
  1604. // frequently know that double rounding cannot occur (or that it is
  1605. // innocuous) by taking advantage of the specific structure of
  1606. // infinitely-precise results that admit double rounding.
  1607. //
  1608. // Specifically, if OpWidth >= 2*DstWdith+1 and DstWidth is sufficient
  1609. // to represent both sources, we can guarantee that the double
  1610. // rounding is innocuous (See p50 of Figueroa's 2000 PhD thesis,
  1611. // "A Rigorous Framework for Fully Supporting the IEEE Standard ..."
  1612. // for proof of this fact).
  1613. //
  1614. // Note: Figueroa does not consider the case where DstFormat !=
  1615. // SrcFormat. It's possible (likely even!) that this analysis
  1616. // could be tightened for those cases, but they are rare (the main
  1617. // case of interest here is (float)((double)float + float)).
  1618. if (OpWidth >= 2*DstWidth+1 && DstWidth >= SrcWidth) {
  1619. Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
  1620. Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
  1621. Instruction *RI = BinaryOperator::Create(BO->getOpcode(), LHS, RHS);
  1622. RI->copyFastMathFlags(BO);
  1623. return RI;
  1624. }
  1625. break;
  1626. case Instruction::FMul:
  1627. // For multiplication, the infinitely precise result has at most
  1628. // LHSWidth + RHSWidth significant bits; if OpWidth is sufficient
  1629. // that such a value can be exactly represented, then no double
  1630. // rounding can possibly occur; we can safely perform the operation
  1631. // in the destination format if it can represent both sources.
  1632. if (OpWidth >= LHSWidth + RHSWidth && DstWidth >= SrcWidth) {
  1633. Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
  1634. Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
  1635. return BinaryOperator::CreateFMulFMF(LHS, RHS, BO);
  1636. }
  1637. break;
  1638. case Instruction::FDiv:
  1639. // For division, we use again use the bound from Figueroa's
  1640. // dissertation. I am entirely certain that this bound can be
  1641. // tightened in the unbalanced operand case by an analysis based on
  1642. // the diophantine rational approximation bound, but the well-known
  1643. // condition used here is a good conservative first pass.
  1644. // TODO: Tighten bound via rigorous analysis of the unbalanced case.
  1645. if (OpWidth >= 2*DstWidth && DstWidth >= SrcWidth) {
  1646. Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
  1647. Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
  1648. return BinaryOperator::CreateFDivFMF(LHS, RHS, BO);
  1649. }
  1650. break;
  1651. case Instruction::FRem: {
  1652. // Remainder is straightforward. Remainder is always exact, so the
  1653. // type of OpI doesn't enter into things at all. We simply evaluate
  1654. // in whichever source type is larger, then convert to the
  1655. // destination type.
  1656. if (SrcWidth == OpWidth)
  1657. break;
  1658. Value *LHS, *RHS;
  1659. if (LHSWidth == SrcWidth) {
  1660. LHS = Builder.CreateFPTrunc(BO->getOperand(0), LHSMinType);
  1661. RHS = Builder.CreateFPTrunc(BO->getOperand(1), LHSMinType);
  1662. } else {
  1663. LHS = Builder.CreateFPTrunc(BO->getOperand(0), RHSMinType);
  1664. RHS = Builder.CreateFPTrunc(BO->getOperand(1), RHSMinType);
  1665. }
  1666. Value *ExactResult = Builder.CreateFRemFMF(LHS, RHS, BO);
  1667. return CastInst::CreateFPCast(ExactResult, Ty);
  1668. }
  1669. }
  1670. }
  1671. // (fptrunc (fneg x)) -> (fneg (fptrunc x))
  1672. Value *X;
  1673. Instruction *Op = dyn_cast<Instruction>(FPT.getOperand(0));
  1674. if (Op && Op->hasOneUse()) {
  1675. // FIXME: The FMF should propagate from the fptrunc, not the source op.
  1676. IRBuilder<>::FastMathFlagGuard FMFG(Builder);
  1677. if (isa<FPMathOperator>(Op))
  1678. Builder.setFastMathFlags(Op->getFastMathFlags());
  1679. if (match(Op, m_FNeg(m_Value(X)))) {
  1680. Value *InnerTrunc = Builder.CreateFPTrunc(X, Ty);
  1681. return UnaryOperator::CreateFNegFMF(InnerTrunc, Op);
  1682. }
  1683. // If we are truncating a select that has an extended operand, we can
  1684. // narrow the other operand and do the select as a narrow op.
  1685. Value *Cond, *X, *Y;
  1686. if (match(Op, m_Select(m_Value(Cond), m_FPExt(m_Value(X)), m_Value(Y))) &&
  1687. X->getType() == Ty) {
  1688. // fptrunc (select Cond, (fpext X), Y --> select Cond, X, (fptrunc Y)
  1689. Value *NarrowY = Builder.CreateFPTrunc(Y, Ty);
  1690. Value *Sel = Builder.CreateSelect(Cond, X, NarrowY, "narrow.sel", Op);
  1691. return replaceInstUsesWith(FPT, Sel);
  1692. }
  1693. if (match(Op, m_Select(m_Value(Cond), m_Value(Y), m_FPExt(m_Value(X)))) &&
  1694. X->getType() == Ty) {
  1695. // fptrunc (select Cond, Y, (fpext X) --> select Cond, (fptrunc Y), X
  1696. Value *NarrowY = Builder.CreateFPTrunc(Y, Ty);
  1697. Value *Sel = Builder.CreateSelect(Cond, NarrowY, X, "narrow.sel", Op);
  1698. return replaceInstUsesWith(FPT, Sel);
  1699. }
  1700. }
  1701. if (auto *II = dyn_cast<IntrinsicInst>(FPT.getOperand(0))) {
  1702. switch (II->getIntrinsicID()) {
  1703. default: break;
  1704. case Intrinsic::ceil:
  1705. case Intrinsic::fabs:
  1706. case Intrinsic::floor:
  1707. case Intrinsic::nearbyint:
  1708. case Intrinsic::rint:
  1709. case Intrinsic::round:
  1710. case Intrinsic::roundeven:
  1711. case Intrinsic::trunc: {
  1712. Value *Src = II->getArgOperand(0);
  1713. if (!Src->hasOneUse())
  1714. break;
  1715. // Except for fabs, this transformation requires the input of the unary FP
  1716. // operation to be itself an fpext from the type to which we're
  1717. // truncating.
  1718. if (II->getIntrinsicID() != Intrinsic::fabs) {
  1719. FPExtInst *FPExtSrc = dyn_cast<FPExtInst>(Src);
  1720. if (!FPExtSrc || FPExtSrc->getSrcTy() != Ty)
  1721. break;
  1722. }
  1723. // Do unary FP operation on smaller type.
  1724. // (fptrunc (fabs x)) -> (fabs (fptrunc x))
  1725. Value *InnerTrunc = Builder.CreateFPTrunc(Src, Ty);
  1726. Function *Overload = Intrinsic::getDeclaration(FPT.getModule(),
  1727. II->getIntrinsicID(), Ty);
  1728. SmallVector<OperandBundleDef, 1> OpBundles;
  1729. II->getOperandBundlesAsDefs(OpBundles);
  1730. CallInst *NewCI =
  1731. CallInst::Create(Overload, {InnerTrunc}, OpBundles, II->getName());
  1732. NewCI->copyFastMathFlags(II);
  1733. return NewCI;
  1734. }
  1735. }
  1736. }
  1737. if (Instruction *I = shrinkInsertElt(FPT, Builder))
  1738. return I;
  1739. Value *Src = FPT.getOperand(0);
  1740. if (isa<SIToFPInst>(Src) || isa<UIToFPInst>(Src)) {
  1741. auto *FPCast = cast<CastInst>(Src);
  1742. if (isKnownExactCastIntToFP(*FPCast, *this))
  1743. return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);
  1744. }
  1745. return nullptr;
  1746. }
  1747. Instruction *InstCombinerImpl::visitFPExt(CastInst &FPExt) {
  1748. // If the source operand is a cast from integer to FP and known exact, then
  1749. // cast the integer operand directly to the destination type.
  1750. Type *Ty = FPExt.getType();
  1751. Value *Src = FPExt.getOperand(0);
  1752. if (isa<SIToFPInst>(Src) || isa<UIToFPInst>(Src)) {
  1753. auto *FPCast = cast<CastInst>(Src);
  1754. if (isKnownExactCastIntToFP(*FPCast, *this))
  1755. return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);
  1756. }
  1757. return commonCastTransforms(FPExt);
  1758. }
  1759. /// fpto{s/u}i({u/s}itofp(X)) --> X or zext(X) or sext(X) or trunc(X)
  1760. /// This is safe if the intermediate type has enough bits in its mantissa to
  1761. /// accurately represent all values of X. For example, this won't work with
  1762. /// i64 -> float -> i64.
  1763. Instruction *InstCombinerImpl::foldItoFPtoI(CastInst &FI) {
  1764. if (!isa<UIToFPInst>(FI.getOperand(0)) && !isa<SIToFPInst>(FI.getOperand(0)))
  1765. return nullptr;
  1766. auto *OpI = cast<CastInst>(FI.getOperand(0));
  1767. Value *X = OpI->getOperand(0);
  1768. Type *XType = X->getType();
  1769. Type *DestType = FI.getType();
  1770. bool IsOutputSigned = isa<FPToSIInst>(FI);
  1771. // Since we can assume the conversion won't overflow, our decision as to
  1772. // whether the input will fit in the float should depend on the minimum
  1773. // of the input range and output range.
  1774. // This means this is also safe for a signed input and unsigned output, since
  1775. // a negative input would lead to undefined behavior.
  1776. if (!isKnownExactCastIntToFP(*OpI, *this)) {
  1777. // The first cast may not round exactly based on the source integer width
  1778. // and FP width, but the overflow UB rules can still allow this to fold.
  1779. // If the destination type is narrow, that means the intermediate FP value
  1780. // must be large enough to hold the source value exactly.
  1781. // For example, (uint8_t)((float)(uint32_t 16777217) is undefined behavior.
  1782. int OutputSize = (int)DestType->getScalarSizeInBits();
  1783. if (OutputSize > OpI->getType()->getFPMantissaWidth())
  1784. return nullptr;
  1785. }
  1786. if (DestType->getScalarSizeInBits() > XType->getScalarSizeInBits()) {
  1787. bool IsInputSigned = isa<SIToFPInst>(OpI);
  1788. if (IsInputSigned && IsOutputSigned)
  1789. return new SExtInst(X, DestType);
  1790. return new ZExtInst(X, DestType);
  1791. }
  1792. if (DestType->getScalarSizeInBits() < XType->getScalarSizeInBits())
  1793. return new TruncInst(X, DestType);
  1794. assert(XType == DestType && "Unexpected types for int to FP to int casts");
  1795. return replaceInstUsesWith(FI, X);
  1796. }
  1797. Instruction *InstCombinerImpl::visitFPToUI(FPToUIInst &FI) {
  1798. if (Instruction *I = foldItoFPtoI(FI))
  1799. return I;
  1800. return commonCastTransforms(FI);
  1801. }
  1802. Instruction *InstCombinerImpl::visitFPToSI(FPToSIInst &FI) {
  1803. if (Instruction *I = foldItoFPtoI(FI))
  1804. return I;
  1805. return commonCastTransforms(FI);
  1806. }
  1807. Instruction *InstCombinerImpl::visitUIToFP(CastInst &CI) {
  1808. return commonCastTransforms(CI);
  1809. }
  1810. Instruction *InstCombinerImpl::visitSIToFP(CastInst &CI) {
  1811. return commonCastTransforms(CI);
  1812. }
  1813. Instruction *InstCombinerImpl::visitIntToPtr(IntToPtrInst &CI) {
  1814. // If the source integer type is not the intptr_t type for this target, do a
  1815. // trunc or zext to the intptr_t type, then inttoptr of it. This allows the
  1816. // cast to be exposed to other transforms.
  1817. unsigned AS = CI.getAddressSpace();
  1818. if (CI.getOperand(0)->getType()->getScalarSizeInBits() !=
  1819. DL.getPointerSizeInBits(AS)) {
  1820. Type *Ty = CI.getOperand(0)->getType()->getWithNewType(
  1821. DL.getIntPtrType(CI.getContext(), AS));
  1822. Value *P = Builder.CreateZExtOrTrunc(CI.getOperand(0), Ty);
  1823. return new IntToPtrInst(P, CI.getType());
  1824. }
  1825. if (Instruction *I = commonCastTransforms(CI))
  1826. return I;
  1827. return nullptr;
  1828. }
  1829. /// Implement the transforms for cast of pointer (bitcast/ptrtoint)
  1830. Instruction *InstCombinerImpl::commonPointerCastTransforms(CastInst &CI) {
  1831. Value *Src = CI.getOperand(0);
  1832. if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Src)) {
  1833. // If casting the result of a getelementptr instruction with no offset, turn
  1834. // this into a cast of the original pointer!
  1835. if (GEP->hasAllZeroIndices() &&
  1836. // If CI is an addrspacecast and GEP changes the poiner type, merging
  1837. // GEP into CI would undo canonicalizing addrspacecast with different
  1838. // pointer types, causing infinite loops.
  1839. (!isa<AddrSpaceCastInst>(CI) ||
  1840. GEP->getType() == GEP->getPointerOperandType())) {
  1841. // Changing the cast operand is usually not a good idea but it is safe
  1842. // here because the pointer operand is being replaced with another
  1843. // pointer operand so the opcode doesn't need to change.
  1844. return replaceOperand(CI, 0, GEP->getOperand(0));
  1845. }
  1846. }
  1847. return commonCastTransforms(CI);
  1848. }
  1849. Instruction *InstCombinerImpl::visitPtrToInt(PtrToIntInst &CI) {
  1850. // If the destination integer type is not the intptr_t type for this target,
  1851. // do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast
  1852. // to be exposed to other transforms.
  1853. Value *SrcOp = CI.getPointerOperand();
  1854. Type *SrcTy = SrcOp->getType();
  1855. Type *Ty = CI.getType();
  1856. unsigned AS = CI.getPointerAddressSpace();
  1857. unsigned TySize = Ty->getScalarSizeInBits();
  1858. unsigned PtrSize = DL.getPointerSizeInBits(AS);
  1859. if (TySize != PtrSize) {
  1860. Type *IntPtrTy =
  1861. SrcTy->getWithNewType(DL.getIntPtrType(CI.getContext(), AS));
  1862. Value *P = Builder.CreatePtrToInt(SrcOp, IntPtrTy);
  1863. return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false);
  1864. }
  1865. if (auto *GEP = dyn_cast<GetElementPtrInst>(SrcOp)) {
  1866. // Fold ptrtoint(gep null, x) to multiply + constant if the GEP has one use.
  1867. // While this can increase the number of instructions it doesn't actually
  1868. // increase the overall complexity since the arithmetic is just part of
  1869. // the GEP otherwise.
  1870. if (GEP->hasOneUse() &&
  1871. isa<ConstantPointerNull>(GEP->getPointerOperand())) {
  1872. return replaceInstUsesWith(CI,
  1873. Builder.CreateIntCast(EmitGEPOffset(GEP), Ty,
  1874. /*isSigned=*/false));
  1875. }
  1876. }
  1877. Value *Vec, *Scalar, *Index;
  1878. if (match(SrcOp, m_OneUse(m_InsertElt(m_IntToPtr(m_Value(Vec)),
  1879. m_Value(Scalar), m_Value(Index)))) &&
  1880. Vec->getType() == Ty) {
  1881. assert(Vec->getType()->getScalarSizeInBits() == PtrSize && "Wrong type");
  1882. // Convert the scalar to int followed by insert to eliminate one cast:
  1883. // p2i (ins (i2p Vec), Scalar, Index --> ins Vec, (p2i Scalar), Index
  1884. Value *NewCast = Builder.CreatePtrToInt(Scalar, Ty->getScalarType());
  1885. return InsertElementInst::Create(Vec, NewCast, Index);
  1886. }
  1887. return commonPointerCastTransforms(CI);
  1888. }
  1889. /// This input value (which is known to have vector type) is being zero extended
  1890. /// or truncated to the specified vector type. Since the zext/trunc is done
  1891. /// using an integer type, we have a (bitcast(cast(bitcast))) pattern,
  1892. /// endianness will impact which end of the vector that is extended or
  1893. /// truncated.
  1894. ///
  1895. /// A vector is always stored with index 0 at the lowest address, which
  1896. /// corresponds to the most significant bits for a big endian stored integer and
  1897. /// the least significant bits for little endian. A trunc/zext of an integer
  1898. /// impacts the big end of the integer. Thus, we need to add/remove elements at
  1899. /// the front of the vector for big endian targets, and the back of the vector
  1900. /// for little endian targets.
  1901. ///
  1902. /// Try to replace it with a shuffle (and vector/vector bitcast) if possible.
  1903. ///
  1904. /// The source and destination vector types may have different element types.
  1905. static Instruction *
  1906. optimizeVectorResizeWithIntegerBitCasts(Value *InVal, VectorType *DestTy,
  1907. InstCombinerImpl &IC) {
  1908. // We can only do this optimization if the output is a multiple of the input
  1909. // element size, or the input is a multiple of the output element size.
  1910. // Convert the input type to have the same element type as the output.
  1911. VectorType *SrcTy = cast<VectorType>(InVal->getType());
  1912. if (SrcTy->getElementType() != DestTy->getElementType()) {
  1913. // The input types don't need to be identical, but for now they must be the
  1914. // same size. There is no specific reason we couldn't handle things like
  1915. // <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten
  1916. // there yet.
  1917. if (SrcTy->getElementType()->getPrimitiveSizeInBits() !=
  1918. DestTy->getElementType()->getPrimitiveSizeInBits())
  1919. return nullptr;
  1920. SrcTy =
  1921. FixedVectorType::get(DestTy->getElementType(),
  1922. cast<FixedVectorType>(SrcTy)->getNumElements());
  1923. InVal = IC.Builder.CreateBitCast(InVal, SrcTy);
  1924. }
  1925. bool IsBigEndian = IC.getDataLayout().isBigEndian();
  1926. unsigned SrcElts = cast<FixedVectorType>(SrcTy)->getNumElements();
  1927. unsigned DestElts = cast<FixedVectorType>(DestTy)->getNumElements();
  1928. assert(SrcElts != DestElts && "Element counts should be different.");
  1929. // Now that the element types match, get the shuffle mask and RHS of the
  1930. // shuffle to use, which depends on whether we're increasing or decreasing the
  1931. // size of the input.
  1932. auto ShuffleMaskStorage = llvm::to_vector<16>(llvm::seq<int>(0, SrcElts));
  1933. ArrayRef<int> ShuffleMask;
  1934. Value *V2;
  1935. if (SrcElts > DestElts) {
  1936. // If we're shrinking the number of elements (rewriting an integer
  1937. // truncate), just shuffle in the elements corresponding to the least
  1938. // significant bits from the input and use poison as the second shuffle
  1939. // input.
  1940. V2 = PoisonValue::get(SrcTy);
  1941. // Make sure the shuffle mask selects the "least significant bits" by
  1942. // keeping elements from back of the src vector for big endian, and from the
  1943. // front for little endian.
  1944. ShuffleMask = ShuffleMaskStorage;
  1945. if (IsBigEndian)
  1946. ShuffleMask = ShuffleMask.take_back(DestElts);
  1947. else
  1948. ShuffleMask = ShuffleMask.take_front(DestElts);
  1949. } else {
  1950. // If we're increasing the number of elements (rewriting an integer zext),
  1951. // shuffle in all of the elements from InVal. Fill the rest of the result
  1952. // elements with zeros from a constant zero.
  1953. V2 = Constant::getNullValue(SrcTy);
  1954. // Use first elt from V2 when indicating zero in the shuffle mask.
  1955. uint32_t NullElt = SrcElts;
  1956. // Extend with null values in the "most significant bits" by adding elements
  1957. // in front of the src vector for big endian, and at the back for little
  1958. // endian.
  1959. unsigned DeltaElts = DestElts - SrcElts;
  1960. if (IsBigEndian)
  1961. ShuffleMaskStorage.insert(ShuffleMaskStorage.begin(), DeltaElts, NullElt);
  1962. else
  1963. ShuffleMaskStorage.append(DeltaElts, NullElt);
  1964. ShuffleMask = ShuffleMaskStorage;
  1965. }
  1966. return new ShuffleVectorInst(InVal, V2, ShuffleMask);
  1967. }
  1968. static bool isMultipleOfTypeSize(unsigned Value, Type *Ty) {
  1969. return Value % Ty->getPrimitiveSizeInBits() == 0;
  1970. }
  1971. static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) {
  1972. return Value / Ty->getPrimitiveSizeInBits();
  1973. }
  1974. /// V is a value which is inserted into a vector of VecEltTy.
  1975. /// Look through the value to see if we can decompose it into
  1976. /// insertions into the vector. See the example in the comment for
  1977. /// OptimizeIntegerToVectorInsertions for the pattern this handles.
  1978. /// The type of V is always a non-zero multiple of VecEltTy's size.
  1979. /// Shift is the number of bits between the lsb of V and the lsb of
  1980. /// the vector.
  1981. ///
  1982. /// This returns false if the pattern can't be matched or true if it can,
  1983. /// filling in Elements with the elements found here.
  1984. static bool collectInsertionElements(Value *V, unsigned Shift,
  1985. SmallVectorImpl<Value *> &Elements,
  1986. Type *VecEltTy, bool isBigEndian) {
  1987. assert(isMultipleOfTypeSize(Shift, VecEltTy) &&
  1988. "Shift should be a multiple of the element type size");
  1989. // Undef values never contribute useful bits to the result.
  1990. if (isa<UndefValue>(V)) return true;
  1991. // If we got down to a value of the right type, we win, try inserting into the
  1992. // right element.
  1993. if (V->getType() == VecEltTy) {
  1994. // Inserting null doesn't actually insert any elements.
  1995. if (Constant *C = dyn_cast<Constant>(V))
  1996. if (C->isNullValue())
  1997. return true;
  1998. unsigned ElementIndex = getTypeSizeIndex(Shift, VecEltTy);
  1999. if (isBigEndian)
  2000. ElementIndex = Elements.size() - ElementIndex - 1;
  2001. // Fail if multiple elements are inserted into this slot.
  2002. if (Elements[ElementIndex])
  2003. return false;
  2004. Elements[ElementIndex] = V;
  2005. return true;
  2006. }
  2007. if (Constant *C = dyn_cast<Constant>(V)) {
  2008. // Figure out the # elements this provides, and bitcast it or slice it up
  2009. // as required.
  2010. unsigned NumElts = getTypeSizeIndex(C->getType()->getPrimitiveSizeInBits(),
  2011. VecEltTy);
  2012. // If the constant is the size of a vector element, we just need to bitcast
  2013. // it to the right type so it gets properly inserted.
  2014. if (NumElts == 1)
  2015. return collectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy),
  2016. Shift, Elements, VecEltTy, isBigEndian);
  2017. // Okay, this is a constant that covers multiple elements. Slice it up into
  2018. // pieces and insert each element-sized piece into the vector.
  2019. if (!isa<IntegerType>(C->getType()))
  2020. C = ConstantExpr::getBitCast(C, IntegerType::get(V->getContext(),
  2021. C->getType()->getPrimitiveSizeInBits()));
  2022. unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits();
  2023. Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize);
  2024. for (unsigned i = 0; i != NumElts; ++i) {
  2025. unsigned ShiftI = Shift+i*ElementSize;
  2026. Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(),
  2027. ShiftI));
  2028. Piece = ConstantExpr::getTrunc(Piece, ElementIntTy);
  2029. if (!collectInsertionElements(Piece, ShiftI, Elements, VecEltTy,
  2030. isBigEndian))
  2031. return false;
  2032. }
  2033. return true;
  2034. }
  2035. if (!V->hasOneUse()) return false;
  2036. Instruction *I = dyn_cast<Instruction>(V);
  2037. if (!I) return false;
  2038. switch (I->getOpcode()) {
  2039. default: return false; // Unhandled case.
  2040. case Instruction::BitCast:
  2041. if (I->getOperand(0)->getType()->isVectorTy())
  2042. return false;
  2043. return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
  2044. isBigEndian);
  2045. case Instruction::ZExt:
  2046. if (!isMultipleOfTypeSize(
  2047. I->getOperand(0)->getType()->getPrimitiveSizeInBits(),
  2048. VecEltTy))
  2049. return false;
  2050. return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
  2051. isBigEndian);
  2052. case Instruction::Or:
  2053. return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
  2054. isBigEndian) &&
  2055. collectInsertionElements(I->getOperand(1), Shift, Elements, VecEltTy,
  2056. isBigEndian);
  2057. case Instruction::Shl: {
  2058. // Must be shifting by a constant that is a multiple of the element size.
  2059. ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
  2060. if (!CI) return false;
  2061. Shift += CI->getZExtValue();
  2062. if (!isMultipleOfTypeSize(Shift, VecEltTy)) return false;
  2063. return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
  2064. isBigEndian);
  2065. }
  2066. }
  2067. }
  2068. /// If the input is an 'or' instruction, we may be doing shifts and ors to
  2069. /// assemble the elements of the vector manually.
  2070. /// Try to rip the code out and replace it with insertelements. This is to
  2071. /// optimize code like this:
  2072. ///
  2073. /// %tmp37 = bitcast float %inc to i32
  2074. /// %tmp38 = zext i32 %tmp37 to i64
  2075. /// %tmp31 = bitcast float %inc5 to i32
  2076. /// %tmp32 = zext i32 %tmp31 to i64
  2077. /// %tmp33 = shl i64 %tmp32, 32
  2078. /// %ins35 = or i64 %tmp33, %tmp38
  2079. /// %tmp43 = bitcast i64 %ins35 to <2 x float>
  2080. ///
  2081. /// Into two insertelements that do "buildvector{%inc, %inc5}".
  2082. static Value *optimizeIntegerToVectorInsertions(BitCastInst &CI,
  2083. InstCombinerImpl &IC) {
  2084. auto *DestVecTy = cast<FixedVectorType>(CI.getType());
  2085. Value *IntInput = CI.getOperand(0);
  2086. SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
  2087. if (!collectInsertionElements(IntInput, 0, Elements,
  2088. DestVecTy->getElementType(),
  2089. IC.getDataLayout().isBigEndian()))
  2090. return nullptr;
  2091. // If we succeeded, we know that all of the element are specified by Elements
  2092. // or are zero if Elements has a null entry. Recast this as a set of
  2093. // insertions.
  2094. Value *Result = Constant::getNullValue(CI.getType());
  2095. for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
  2096. if (!Elements[i]) continue; // Unset element.
  2097. Result = IC.Builder.CreateInsertElement(Result, Elements[i],
  2098. IC.Builder.getInt32(i));
  2099. }
  2100. return Result;
  2101. }
  2102. /// Canonicalize scalar bitcasts of extracted elements into a bitcast of the
  2103. /// vector followed by extract element. The backend tends to handle bitcasts of
  2104. /// vectors better than bitcasts of scalars because vector registers are
  2105. /// usually not type-specific like scalar integer or scalar floating-point.
  2106. static Instruction *canonicalizeBitCastExtElt(BitCastInst &BitCast,
  2107. InstCombinerImpl &IC) {
  2108. Value *VecOp, *Index;
  2109. if (!match(BitCast.getOperand(0),
  2110. m_OneUse(m_ExtractElt(m_Value(VecOp), m_Value(Index)))))
  2111. return nullptr;
  2112. // The bitcast must be to a vectorizable type, otherwise we can't make a new
  2113. // type to extract from.
  2114. Type *DestType = BitCast.getType();
  2115. VectorType *VecType = cast<VectorType>(VecOp->getType());
  2116. if (VectorType::isValidElementType(DestType)) {
  2117. auto *NewVecType = VectorType::get(DestType, VecType);
  2118. auto *NewBC = IC.Builder.CreateBitCast(VecOp, NewVecType, "bc");
  2119. return ExtractElementInst::Create(NewBC, Index);
  2120. }
  2121. // Only solve DestType is vector to avoid inverse transform in visitBitCast.
  2122. // bitcast (extractelement <1 x elt>, dest) -> bitcast(<1 x elt>, dest)
  2123. auto *FixedVType = dyn_cast<FixedVectorType>(VecType);
  2124. if (DestType->isVectorTy() && FixedVType && FixedVType->getNumElements() == 1)
  2125. return CastInst::Create(Instruction::BitCast, VecOp, DestType);
  2126. return nullptr;
  2127. }
  2128. /// Change the type of a bitwise logic operation if we can eliminate a bitcast.
  2129. static Instruction *foldBitCastBitwiseLogic(BitCastInst &BitCast,
  2130. InstCombiner::BuilderTy &Builder) {
  2131. Type *DestTy = BitCast.getType();
  2132. BinaryOperator *BO;
  2133. if (!match(BitCast.getOperand(0), m_OneUse(m_BinOp(BO))) ||
  2134. !BO->isBitwiseLogicOp())
  2135. return nullptr;
  2136. // FIXME: This transform is restricted to vector types to avoid backend
  2137. // problems caused by creating potentially illegal operations. If a fix-up is
  2138. // added to handle that situation, we can remove this check.
  2139. if (!DestTy->isVectorTy() || !BO->getType()->isVectorTy())
  2140. return nullptr;
  2141. if (DestTy->isFPOrFPVectorTy()) {
  2142. Value *X, *Y;
  2143. // bitcast(logic(bitcast(X), bitcast(Y))) -> bitcast'(logic(bitcast'(X), Y))
  2144. if (match(BO->getOperand(0), m_OneUse(m_BitCast(m_Value(X)))) &&
  2145. match(BO->getOperand(1), m_OneUse(m_BitCast(m_Value(Y))))) {
  2146. if (X->getType()->isFPOrFPVectorTy() &&
  2147. Y->getType()->isIntOrIntVectorTy()) {
  2148. Value *CastedOp =
  2149. Builder.CreateBitCast(BO->getOperand(0), Y->getType());
  2150. Value *NewBO = Builder.CreateBinOp(BO->getOpcode(), CastedOp, Y);
  2151. return CastInst::CreateBitOrPointerCast(NewBO, DestTy);
  2152. }
  2153. if (X->getType()->isIntOrIntVectorTy() &&
  2154. Y->getType()->isFPOrFPVectorTy()) {
  2155. Value *CastedOp =
  2156. Builder.CreateBitCast(BO->getOperand(1), X->getType());
  2157. Value *NewBO = Builder.CreateBinOp(BO->getOpcode(), CastedOp, X);
  2158. return CastInst::CreateBitOrPointerCast(NewBO, DestTy);
  2159. }
  2160. }
  2161. return nullptr;
  2162. }
  2163. if (!DestTy->isIntOrIntVectorTy())
  2164. return nullptr;
  2165. Value *X;
  2166. if (match(BO->getOperand(0), m_OneUse(m_BitCast(m_Value(X)))) &&
  2167. X->getType() == DestTy && !isa<Constant>(X)) {
  2168. // bitcast(logic(bitcast(X), Y)) --> logic'(X, bitcast(Y))
  2169. Value *CastedOp1 = Builder.CreateBitCast(BO->getOperand(1), DestTy);
  2170. return BinaryOperator::Create(BO->getOpcode(), X, CastedOp1);
  2171. }
  2172. if (match(BO->getOperand(1), m_OneUse(m_BitCast(m_Value(X)))) &&
  2173. X->getType() == DestTy && !isa<Constant>(X)) {
  2174. // bitcast(logic(Y, bitcast(X))) --> logic'(bitcast(Y), X)
  2175. Value *CastedOp0 = Builder.CreateBitCast(BO->getOperand(0), DestTy);
  2176. return BinaryOperator::Create(BO->getOpcode(), CastedOp0, X);
  2177. }
  2178. // Canonicalize vector bitcasts to come before vector bitwise logic with a
  2179. // constant. This eases recognition of special constants for later ops.
  2180. // Example:
  2181. // icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b
  2182. Constant *C;
  2183. if (match(BO->getOperand(1), m_Constant(C))) {
  2184. // bitcast (logic X, C) --> logic (bitcast X, C')
  2185. Value *CastedOp0 = Builder.CreateBitCast(BO->getOperand(0), DestTy);
  2186. Value *CastedC = Builder.CreateBitCast(C, DestTy);
  2187. return BinaryOperator::Create(BO->getOpcode(), CastedOp0, CastedC);
  2188. }
  2189. return nullptr;
  2190. }
  2191. /// Change the type of a select if we can eliminate a bitcast.
  2192. static Instruction *foldBitCastSelect(BitCastInst &BitCast,
  2193. InstCombiner::BuilderTy &Builder) {
  2194. Value *Cond, *TVal, *FVal;
  2195. if (!match(BitCast.getOperand(0),
  2196. m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
  2197. return nullptr;
  2198. // A vector select must maintain the same number of elements in its operands.
  2199. Type *CondTy = Cond->getType();
  2200. Type *DestTy = BitCast.getType();
  2201. if (auto *CondVTy = dyn_cast<VectorType>(CondTy))
  2202. if (!DestTy->isVectorTy() ||
  2203. CondVTy->getElementCount() !=
  2204. cast<VectorType>(DestTy)->getElementCount())
  2205. return nullptr;
  2206. // FIXME: This transform is restricted from changing the select between
  2207. // scalars and vectors to avoid backend problems caused by creating
  2208. // potentially illegal operations. If a fix-up is added to handle that
  2209. // situation, we can remove this check.
  2210. if (DestTy->isVectorTy() != TVal->getType()->isVectorTy())
  2211. return nullptr;
  2212. auto *Sel = cast<Instruction>(BitCast.getOperand(0));
  2213. Value *X;
  2214. if (match(TVal, m_OneUse(m_BitCast(m_Value(X)))) && X->getType() == DestTy &&
  2215. !isa<Constant>(X)) {
  2216. // bitcast(select(Cond, bitcast(X), Y)) --> select'(Cond, X, bitcast(Y))
  2217. Value *CastedVal = Builder.CreateBitCast(FVal, DestTy);
  2218. return SelectInst::Create(Cond, X, CastedVal, "", nullptr, Sel);
  2219. }
  2220. if (match(FVal, m_OneUse(m_BitCast(m_Value(X)))) && X->getType() == DestTy &&
  2221. !isa<Constant>(X)) {
  2222. // bitcast(select(Cond, Y, bitcast(X))) --> select'(Cond, bitcast(Y), X)
  2223. Value *CastedVal = Builder.CreateBitCast(TVal, DestTy);
  2224. return SelectInst::Create(Cond, CastedVal, X, "", nullptr, Sel);
  2225. }
  2226. return nullptr;
  2227. }
  2228. /// Check if all users of CI are StoreInsts.
  2229. static bool hasStoreUsersOnly(CastInst &CI) {
  2230. for (User *U : CI.users()) {
  2231. if (!isa<StoreInst>(U))
  2232. return false;
  2233. }
  2234. return true;
  2235. }
  2236. /// This function handles following case
  2237. ///
  2238. /// A -> B cast
  2239. /// PHI
  2240. /// B -> A cast
  2241. ///
  2242. /// All the related PHI nodes can be replaced by new PHI nodes with type A.
  2243. /// The uses of \p CI can be changed to the new PHI node corresponding to \p PN.
  2244. Instruction *InstCombinerImpl::optimizeBitCastFromPhi(CastInst &CI,
  2245. PHINode *PN) {
  2246. // BitCast used by Store can be handled in InstCombineLoadStoreAlloca.cpp.
  2247. if (hasStoreUsersOnly(CI))
  2248. return nullptr;
  2249. Value *Src = CI.getOperand(0);
  2250. Type *SrcTy = Src->getType(); // Type B
  2251. Type *DestTy = CI.getType(); // Type A
  2252. SmallVector<PHINode *, 4> PhiWorklist;
  2253. SmallSetVector<PHINode *, 4> OldPhiNodes;
  2254. // Find all of the A->B casts and PHI nodes.
  2255. // We need to inspect all related PHI nodes, but PHIs can be cyclic, so
  2256. // OldPhiNodes is used to track all known PHI nodes, before adding a new
  2257. // PHI to PhiWorklist, it is checked against and added to OldPhiNodes first.
  2258. PhiWorklist.push_back(PN);
  2259. OldPhiNodes.insert(PN);
  2260. while (!PhiWorklist.empty()) {
  2261. auto *OldPN = PhiWorklist.pop_back_val();
  2262. for (Value *IncValue : OldPN->incoming_values()) {
  2263. if (isa<Constant>(IncValue))
  2264. continue;
  2265. if (auto *LI = dyn_cast<LoadInst>(IncValue)) {
  2266. // If there is a sequence of one or more load instructions, each loaded
  2267. // value is used as address of later load instruction, bitcast is
  2268. // necessary to change the value type, don't optimize it. For
  2269. // simplicity we give up if the load address comes from another load.
  2270. Value *Addr = LI->getOperand(0);
  2271. if (Addr == &CI || isa<LoadInst>(Addr))
  2272. return nullptr;
  2273. // Don't tranform "load <256 x i32>, <256 x i32>*" to
  2274. // "load x86_amx, x86_amx*", because x86_amx* is invalid.
  2275. // TODO: Remove this check when bitcast between vector and x86_amx
  2276. // is replaced with a specific intrinsic.
  2277. if (DestTy->isX86_AMXTy())
  2278. return nullptr;
  2279. if (LI->hasOneUse() && LI->isSimple())
  2280. continue;
  2281. // If a LoadInst has more than one use, changing the type of loaded
  2282. // value may create another bitcast.
  2283. return nullptr;
  2284. }
  2285. if (auto *PNode = dyn_cast<PHINode>(IncValue)) {
  2286. if (OldPhiNodes.insert(PNode))
  2287. PhiWorklist.push_back(PNode);
  2288. continue;
  2289. }
  2290. auto *BCI = dyn_cast<BitCastInst>(IncValue);
  2291. // We can't handle other instructions.
  2292. if (!BCI)
  2293. return nullptr;
  2294. // Verify it's a A->B cast.
  2295. Type *TyA = BCI->getOperand(0)->getType();
  2296. Type *TyB = BCI->getType();
  2297. if (TyA != DestTy || TyB != SrcTy)
  2298. return nullptr;
  2299. }
  2300. }
  2301. // Check that each user of each old PHI node is something that we can
  2302. // rewrite, so that all of the old PHI nodes can be cleaned up afterwards.
  2303. for (auto *OldPN : OldPhiNodes) {
  2304. for (User *V : OldPN->users()) {
  2305. if (auto *SI = dyn_cast<StoreInst>(V)) {
  2306. if (!SI->isSimple() || SI->getOperand(0) != OldPN)
  2307. return nullptr;
  2308. } else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
  2309. // Verify it's a B->A cast.
  2310. Type *TyB = BCI->getOperand(0)->getType();
  2311. Type *TyA = BCI->getType();
  2312. if (TyA != DestTy || TyB != SrcTy)
  2313. return nullptr;
  2314. } else if (auto *PHI = dyn_cast<PHINode>(V)) {
  2315. // As long as the user is another old PHI node, then even if we don't
  2316. // rewrite it, the PHI web we're considering won't have any users
  2317. // outside itself, so it'll be dead.
  2318. if (!OldPhiNodes.contains(PHI))
  2319. return nullptr;
  2320. } else {
  2321. return nullptr;
  2322. }
  2323. }
  2324. }
  2325. // For each old PHI node, create a corresponding new PHI node with a type A.
  2326. SmallDenseMap<PHINode *, PHINode *> NewPNodes;
  2327. for (auto *OldPN : OldPhiNodes) {
  2328. Builder.SetInsertPoint(OldPN);
  2329. PHINode *NewPN = Builder.CreatePHI(DestTy, OldPN->getNumOperands());
  2330. NewPNodes[OldPN] = NewPN;
  2331. }
  2332. // Fill in the operands of new PHI nodes.
  2333. for (auto *OldPN : OldPhiNodes) {
  2334. PHINode *NewPN = NewPNodes[OldPN];
  2335. for (unsigned j = 0, e = OldPN->getNumOperands(); j != e; ++j) {
  2336. Value *V = OldPN->getOperand(j);
  2337. Value *NewV = nullptr;
  2338. if (auto *C = dyn_cast<Constant>(V)) {
  2339. NewV = ConstantExpr::getBitCast(C, DestTy);
  2340. } else if (auto *LI = dyn_cast<LoadInst>(V)) {
  2341. // Explicitly perform load combine to make sure no opposing transform
  2342. // can remove the bitcast in the meantime and trigger an infinite loop.
  2343. Builder.SetInsertPoint(LI);
  2344. NewV = combineLoadToNewType(*LI, DestTy);
  2345. // Remove the old load and its use in the old phi, which itself becomes
  2346. // dead once the whole transform finishes.
  2347. replaceInstUsesWith(*LI, PoisonValue::get(LI->getType()));
  2348. eraseInstFromFunction(*LI);
  2349. } else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
  2350. NewV = BCI->getOperand(0);
  2351. } else if (auto *PrevPN = dyn_cast<PHINode>(V)) {
  2352. NewV = NewPNodes[PrevPN];
  2353. }
  2354. assert(NewV);
  2355. NewPN->addIncoming(NewV, OldPN->getIncomingBlock(j));
  2356. }
  2357. }
  2358. // Traverse all accumulated PHI nodes and process its users,
  2359. // which are Stores and BitcCasts. Without this processing
  2360. // NewPHI nodes could be replicated and could lead to extra
  2361. // moves generated after DeSSA.
  2362. // If there is a store with type B, change it to type A.
  2363. // Replace users of BitCast B->A with NewPHI. These will help
  2364. // later to get rid off a closure formed by OldPHI nodes.
  2365. Instruction *RetVal = nullptr;
  2366. for (auto *OldPN : OldPhiNodes) {
  2367. PHINode *NewPN = NewPNodes[OldPN];
  2368. for (User *V : make_early_inc_range(OldPN->users())) {
  2369. if (auto *SI = dyn_cast<StoreInst>(V)) {
  2370. assert(SI->isSimple() && SI->getOperand(0) == OldPN);
  2371. Builder.SetInsertPoint(SI);
  2372. auto *NewBC =
  2373. cast<BitCastInst>(Builder.CreateBitCast(NewPN, SrcTy));
  2374. SI->setOperand(0, NewBC);
  2375. Worklist.push(SI);
  2376. assert(hasStoreUsersOnly(*NewBC));
  2377. }
  2378. else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
  2379. Type *TyB = BCI->getOperand(0)->getType();
  2380. Type *TyA = BCI->getType();
  2381. assert(TyA == DestTy && TyB == SrcTy);
  2382. (void) TyA;
  2383. (void) TyB;
  2384. Instruction *I = replaceInstUsesWith(*BCI, NewPN);
  2385. if (BCI == &CI)
  2386. RetVal = I;
  2387. } else if (auto *PHI = dyn_cast<PHINode>(V)) {
  2388. assert(OldPhiNodes.contains(PHI));
  2389. (void) PHI;
  2390. } else {
  2391. llvm_unreachable("all uses should be handled");
  2392. }
  2393. }
  2394. }
  2395. return RetVal;
  2396. }
  2397. static Instruction *convertBitCastToGEP(BitCastInst &CI, IRBuilderBase &Builder,
  2398. const DataLayout &DL) {
  2399. Value *Src = CI.getOperand(0);
  2400. PointerType *SrcPTy = cast<PointerType>(Src->getType());
  2401. PointerType *DstPTy = cast<PointerType>(CI.getType());
  2402. // Bitcasts involving opaque pointers cannot be converted into a GEP.
  2403. if (SrcPTy->isOpaque() || DstPTy->isOpaque())
  2404. return nullptr;
  2405. Type *DstElTy = DstPTy->getNonOpaquePointerElementType();
  2406. Type *SrcElTy = SrcPTy->getNonOpaquePointerElementType();
  2407. // When the type pointed to is not sized the cast cannot be
  2408. // turned into a gep.
  2409. if (!SrcElTy->isSized())
  2410. return nullptr;
  2411. // If the source and destination are pointers, and this cast is equivalent
  2412. // to a getelementptr X, 0, 0, 0... turn it into the appropriate gep.
  2413. // This can enhance SROA and other transforms that want type-safe pointers.
  2414. unsigned NumZeros = 0;
  2415. while (SrcElTy && SrcElTy != DstElTy) {
  2416. SrcElTy = GetElementPtrInst::getTypeAtIndex(SrcElTy, (uint64_t)0);
  2417. ++NumZeros;
  2418. }
  2419. // If we found a path from the src to dest, create the getelementptr now.
  2420. if (SrcElTy == DstElTy) {
  2421. SmallVector<Value *, 8> Idxs(NumZeros + 1, Builder.getInt32(0));
  2422. GetElementPtrInst *GEP = GetElementPtrInst::Create(
  2423. SrcPTy->getNonOpaquePointerElementType(), Src, Idxs);
  2424. // If the source pointer is dereferenceable, then assume it points to an
  2425. // allocated object and apply "inbounds" to the GEP.
  2426. bool CanBeNull, CanBeFreed;
  2427. if (Src->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed)) {
  2428. // In a non-default address space (not 0), a null pointer can not be
  2429. // assumed inbounds, so ignore that case (dereferenceable_or_null).
  2430. // The reason is that 'null' is not treated differently in these address
  2431. // spaces, and we consequently ignore the 'gep inbounds' special case
  2432. // for 'null' which allows 'inbounds' on 'null' if the indices are
  2433. // zeros.
  2434. if (SrcPTy->getAddressSpace() == 0 || !CanBeNull)
  2435. GEP->setIsInBounds();
  2436. }
  2437. return GEP;
  2438. }
  2439. return nullptr;
  2440. }
  2441. Instruction *InstCombinerImpl::visitBitCast(BitCastInst &CI) {
  2442. // If the operands are integer typed then apply the integer transforms,
  2443. // otherwise just apply the common ones.
  2444. Value *Src = CI.getOperand(0);
  2445. Type *SrcTy = Src->getType();
  2446. Type *DestTy = CI.getType();
  2447. // Get rid of casts from one type to the same type. These are useless and can
  2448. // be replaced by the operand.
  2449. if (DestTy == Src->getType())
  2450. return replaceInstUsesWith(CI, Src);
  2451. if (isa<PointerType>(SrcTy) && isa<PointerType>(DestTy)) {
  2452. // If we are casting a alloca to a pointer to a type of the same
  2453. // size, rewrite the allocation instruction to allocate the "right" type.
  2454. // There is no need to modify malloc calls because it is their bitcast that
  2455. // needs to be cleaned up.
  2456. if (AllocaInst *AI = dyn_cast<AllocaInst>(Src))
  2457. if (Instruction *V = PromoteCastOfAllocation(CI, *AI))
  2458. return V;
  2459. if (Instruction *I = convertBitCastToGEP(CI, Builder, DL))
  2460. return I;
  2461. }
  2462. if (FixedVectorType *DestVTy = dyn_cast<FixedVectorType>(DestTy)) {
  2463. // Beware: messing with this target-specific oddity may cause trouble.
  2464. if (DestVTy->getNumElements() == 1 && SrcTy->isX86_MMXTy()) {
  2465. Value *Elem = Builder.CreateBitCast(Src, DestVTy->getElementType());
  2466. return InsertElementInst::Create(PoisonValue::get(DestTy), Elem,
  2467. Constant::getNullValue(Type::getInt32Ty(CI.getContext())));
  2468. }
  2469. if (isa<IntegerType>(SrcTy)) {
  2470. // If this is a cast from an integer to vector, check to see if the input
  2471. // is a trunc or zext of a bitcast from vector. If so, we can replace all
  2472. // the casts with a shuffle and (potentially) a bitcast.
  2473. if (isa<TruncInst>(Src) || isa<ZExtInst>(Src)) {
  2474. CastInst *SrcCast = cast<CastInst>(Src);
  2475. if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0)))
  2476. if (isa<VectorType>(BCIn->getOperand(0)->getType()))
  2477. if (Instruction *I = optimizeVectorResizeWithIntegerBitCasts(
  2478. BCIn->getOperand(0), cast<VectorType>(DestTy), *this))
  2479. return I;
  2480. }
  2481. // If the input is an 'or' instruction, we may be doing shifts and ors to
  2482. // assemble the elements of the vector manually. Try to rip the code out
  2483. // and replace it with insertelements.
  2484. if (Value *V = optimizeIntegerToVectorInsertions(CI, *this))
  2485. return replaceInstUsesWith(CI, V);
  2486. }
  2487. }
  2488. if (FixedVectorType *SrcVTy = dyn_cast<FixedVectorType>(SrcTy)) {
  2489. if (SrcVTy->getNumElements() == 1) {
  2490. // If our destination is not a vector, then make this a straight
  2491. // scalar-scalar cast.
  2492. if (!DestTy->isVectorTy()) {
  2493. Value *Elem =
  2494. Builder.CreateExtractElement(Src,
  2495. Constant::getNullValue(Type::getInt32Ty(CI.getContext())));
  2496. return CastInst::Create(Instruction::BitCast, Elem, DestTy);
  2497. }
  2498. // Otherwise, see if our source is an insert. If so, then use the scalar
  2499. // component directly:
  2500. // bitcast (inselt <1 x elt> V, X, 0) to <n x m> --> bitcast X to <n x m>
  2501. if (auto *InsElt = dyn_cast<InsertElementInst>(Src))
  2502. return new BitCastInst(InsElt->getOperand(1), DestTy);
  2503. }
  2504. // Convert an artificial vector insert into more analyzable bitwise logic.
  2505. unsigned BitWidth = DestTy->getScalarSizeInBits();
  2506. Value *X, *Y;
  2507. uint64_t IndexC;
  2508. if (match(Src, m_OneUse(m_InsertElt(m_OneUse(m_BitCast(m_Value(X))),
  2509. m_Value(Y), m_ConstantInt(IndexC)))) &&
  2510. DestTy->isIntegerTy() && X->getType() == DestTy &&
  2511. Y->getType()->isIntegerTy() && isDesirableIntType(BitWidth)) {
  2512. // Adjust for big endian - the LSBs are at the high index.
  2513. if (DL.isBigEndian())
  2514. IndexC = SrcVTy->getNumElements() - 1 - IndexC;
  2515. // We only handle (endian-normalized) insert to index 0. Any other insert
  2516. // would require a left-shift, so that is an extra instruction.
  2517. if (IndexC == 0) {
  2518. // bitcast (inselt (bitcast X), Y, 0) --> or (and X, MaskC), (zext Y)
  2519. unsigned EltWidth = Y->getType()->getScalarSizeInBits();
  2520. APInt MaskC = APInt::getHighBitsSet(BitWidth, BitWidth - EltWidth);
  2521. Value *AndX = Builder.CreateAnd(X, MaskC);
  2522. Value *ZextY = Builder.CreateZExt(Y, DestTy);
  2523. return BinaryOperator::CreateOr(AndX, ZextY);
  2524. }
  2525. }
  2526. }
  2527. if (auto *Shuf = dyn_cast<ShuffleVectorInst>(Src)) {
  2528. // Okay, we have (bitcast (shuffle ..)). Check to see if this is
  2529. // a bitcast to a vector with the same # elts.
  2530. Value *ShufOp0 = Shuf->getOperand(0);
  2531. Value *ShufOp1 = Shuf->getOperand(1);
  2532. auto ShufElts = cast<VectorType>(Shuf->getType())->getElementCount();
  2533. auto SrcVecElts = cast<VectorType>(ShufOp0->getType())->getElementCount();
  2534. if (Shuf->hasOneUse() && DestTy->isVectorTy() &&
  2535. cast<VectorType>(DestTy)->getElementCount() == ShufElts &&
  2536. ShufElts == SrcVecElts) {
  2537. BitCastInst *Tmp;
  2538. // If either of the operands is a cast from CI.getType(), then
  2539. // evaluating the shuffle in the casted destination's type will allow
  2540. // us to eliminate at least one cast.
  2541. if (((Tmp = dyn_cast<BitCastInst>(ShufOp0)) &&
  2542. Tmp->getOperand(0)->getType() == DestTy) ||
  2543. ((Tmp = dyn_cast<BitCastInst>(ShufOp1)) &&
  2544. Tmp->getOperand(0)->getType() == DestTy)) {
  2545. Value *LHS = Builder.CreateBitCast(ShufOp0, DestTy);
  2546. Value *RHS = Builder.CreateBitCast(ShufOp1, DestTy);
  2547. // Return a new shuffle vector. Use the same element ID's, as we
  2548. // know the vector types match #elts.
  2549. return new ShuffleVectorInst(LHS, RHS, Shuf->getShuffleMask());
  2550. }
  2551. }
  2552. // A bitcasted-to-scalar and byte/bit reversing shuffle is better recognized
  2553. // as a byte/bit swap:
  2554. // bitcast <N x i8> (shuf X, undef, <N, N-1,...0>) -> bswap (bitcast X)
  2555. // bitcast <N x i1> (shuf X, undef, <N, N-1,...0>) -> bitreverse (bitcast X)
  2556. if (DestTy->isIntegerTy() && ShufElts.getKnownMinValue() % 2 == 0 &&
  2557. Shuf->hasOneUse() && Shuf->isReverse()) {
  2558. unsigned IntrinsicNum = 0;
  2559. if (DL.isLegalInteger(DestTy->getScalarSizeInBits()) &&
  2560. SrcTy->getScalarSizeInBits() == 8) {
  2561. IntrinsicNum = Intrinsic::bswap;
  2562. } else if (SrcTy->getScalarSizeInBits() == 1) {
  2563. IntrinsicNum = Intrinsic::bitreverse;
  2564. }
  2565. if (IntrinsicNum != 0) {
  2566. assert(ShufOp0->getType() == SrcTy && "Unexpected shuffle mask");
  2567. assert(match(ShufOp1, m_Undef()) && "Unexpected shuffle op");
  2568. Function *BswapOrBitreverse =
  2569. Intrinsic::getDeclaration(CI.getModule(), IntrinsicNum, DestTy);
  2570. Value *ScalarX = Builder.CreateBitCast(ShufOp0, DestTy);
  2571. return CallInst::Create(BswapOrBitreverse, {ScalarX});
  2572. }
  2573. }
  2574. }
  2575. // Handle the A->B->A cast, and there is an intervening PHI node.
  2576. if (PHINode *PN = dyn_cast<PHINode>(Src))
  2577. if (Instruction *I = optimizeBitCastFromPhi(CI, PN))
  2578. return I;
  2579. if (Instruction *I = canonicalizeBitCastExtElt(CI, *this))
  2580. return I;
  2581. if (Instruction *I = foldBitCastBitwiseLogic(CI, Builder))
  2582. return I;
  2583. if (Instruction *I = foldBitCastSelect(CI, Builder))
  2584. return I;
  2585. if (SrcTy->isPointerTy())
  2586. return commonPointerCastTransforms(CI);
  2587. return commonCastTransforms(CI);
  2588. }
  2589. Instruction *InstCombinerImpl::visitAddrSpaceCast(AddrSpaceCastInst &CI) {
  2590. // If the destination pointer element type is not the same as the source's
  2591. // first do a bitcast to the destination type, and then the addrspacecast.
  2592. // This allows the cast to be exposed to other transforms.
  2593. Value *Src = CI.getOperand(0);
  2594. PointerType *SrcTy = cast<PointerType>(Src->getType()->getScalarType());
  2595. PointerType *DestTy = cast<PointerType>(CI.getType()->getScalarType());
  2596. if (!SrcTy->hasSameElementTypeAs(DestTy)) {
  2597. Type *MidTy =
  2598. PointerType::getWithSamePointeeType(DestTy, SrcTy->getAddressSpace());
  2599. // Handle vectors of pointers.
  2600. if (VectorType *VT = dyn_cast<VectorType>(CI.getType()))
  2601. MidTy = VectorType::get(MidTy, VT->getElementCount());
  2602. Value *NewBitCast = Builder.CreateBitCast(Src, MidTy);
  2603. return new AddrSpaceCastInst(NewBitCast, CI.getType());
  2604. }
  2605. return commonPointerCastTransforms(CI);
  2606. }