InstCombineCasts.cpp 117 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876
  1. //===- InstCombineCasts.cpp -----------------------------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file implements the visit functions for cast operations.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "InstCombineInternal.h"
  13. #include "llvm/ADT/SetVector.h"
  14. #include "llvm/Analysis/ConstantFolding.h"
  15. #include "llvm/Analysis/TargetLibraryInfo.h"
  16. #include "llvm/IR/DIBuilder.h"
  17. #include "llvm/IR/DataLayout.h"
  18. #include "llvm/IR/PatternMatch.h"
  19. #include "llvm/Support/KnownBits.h"
  20. #include "llvm/Transforms/InstCombine/InstCombiner.h"
  21. #include <numeric>
  22. using namespace llvm;
  23. using namespace PatternMatch;
  24. #define DEBUG_TYPE "instcombine"
  25. /// Analyze 'Val', seeing if it is a simple linear expression.
  26. /// If so, decompose it, returning some value X, such that Val is
  27. /// X*Scale+Offset.
  28. ///
  29. static Value *decomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
  30. uint64_t &Offset) {
  31. if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
  32. Offset = CI->getZExtValue();
  33. Scale = 0;
  34. return ConstantInt::get(Val->getType(), 0);
  35. }
  36. if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) {
  37. // Cannot look past anything that might overflow.
  38. OverflowingBinaryOperator *OBI = dyn_cast<OverflowingBinaryOperator>(Val);
  39. if (OBI && !OBI->hasNoUnsignedWrap() && !OBI->hasNoSignedWrap()) {
  40. Scale = 1;
  41. Offset = 0;
  42. return Val;
  43. }
  44. if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
  45. if (I->getOpcode() == Instruction::Shl) {
  46. // This is a value scaled by '1 << the shift amt'.
  47. Scale = UINT64_C(1) << RHS->getZExtValue();
  48. Offset = 0;
  49. return I->getOperand(0);
  50. }
  51. if (I->getOpcode() == Instruction::Mul) {
  52. // This value is scaled by 'RHS'.
  53. Scale = RHS->getZExtValue();
  54. Offset = 0;
  55. return I->getOperand(0);
  56. }
  57. if (I->getOpcode() == Instruction::Add) {
  58. // We have X+C. Check to see if we really have (X*C2)+C1,
  59. // where C1 is divisible by C2.
  60. unsigned SubScale;
  61. Value *SubVal =
  62. decomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset);
  63. Offset += RHS->getZExtValue();
  64. Scale = SubScale;
  65. return SubVal;
  66. }
  67. }
  68. }
  69. // Otherwise, we can't look past this.
  70. Scale = 1;
  71. Offset = 0;
  72. return Val;
  73. }
  74. /// If we find a cast of an allocation instruction, try to eliminate the cast by
  75. /// moving the type information into the alloc.
  76. Instruction *InstCombinerImpl::PromoteCastOfAllocation(BitCastInst &CI,
  77. AllocaInst &AI) {
  78. PointerType *PTy = cast<PointerType>(CI.getType());
  79. // Opaque pointers don't have an element type we could replace with.
  80. if (PTy->isOpaque())
  81. return nullptr;
  82. IRBuilderBase::InsertPointGuard Guard(Builder);
  83. Builder.SetInsertPoint(&AI);
  84. // Get the type really allocated and the type casted to.
  85. Type *AllocElTy = AI.getAllocatedType();
  86. Type *CastElTy = PTy->getNonOpaquePointerElementType();
  87. if (!AllocElTy->isSized() || !CastElTy->isSized()) return nullptr;
  88. // This optimisation does not work for cases where the cast type
  89. // is scalable and the allocated type is not. This because we need to
  90. // know how many times the casted type fits into the allocated type.
  91. // For the opposite case where the allocated type is scalable and the
  92. // cast type is not this leads to poor code quality due to the
  93. // introduction of 'vscale' into the calculations. It seems better to
  94. // bail out for this case too until we've done a proper cost-benefit
  95. // analysis.
  96. bool AllocIsScalable = isa<ScalableVectorType>(AllocElTy);
  97. bool CastIsScalable = isa<ScalableVectorType>(CastElTy);
  98. if (AllocIsScalable != CastIsScalable) return nullptr;
  99. Align AllocElTyAlign = DL.getABITypeAlign(AllocElTy);
  100. Align CastElTyAlign = DL.getABITypeAlign(CastElTy);
  101. if (CastElTyAlign < AllocElTyAlign) return nullptr;
  102. // If the allocation has multiple uses, only promote it if we are strictly
  103. // increasing the alignment of the resultant allocation. If we keep it the
  104. // same, we open the door to infinite loops of various kinds.
  105. if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return nullptr;
  106. // The alloc and cast types should be either both fixed or both scalable.
  107. uint64_t AllocElTySize = DL.getTypeAllocSize(AllocElTy).getKnownMinSize();
  108. uint64_t CastElTySize = DL.getTypeAllocSize(CastElTy).getKnownMinSize();
  109. if (CastElTySize == 0 || AllocElTySize == 0) return nullptr;
  110. // If the allocation has multiple uses, only promote it if we're not
  111. // shrinking the amount of memory being allocated.
  112. uint64_t AllocElTyStoreSize = DL.getTypeStoreSize(AllocElTy).getKnownMinSize();
  113. uint64_t CastElTyStoreSize = DL.getTypeStoreSize(CastElTy).getKnownMinSize();
  114. if (!AI.hasOneUse() && CastElTyStoreSize < AllocElTyStoreSize) return nullptr;
  115. // See if we can satisfy the modulus by pulling a scale out of the array
  116. // size argument.
  117. unsigned ArraySizeScale;
  118. uint64_t ArrayOffset;
  119. Value *NumElements = // See if the array size is a decomposable linear expr.
  120. decomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset);
  121. // If we can now satisfy the modulus, by using a non-1 scale, we really can
  122. // do the xform.
  123. if ((AllocElTySize*ArraySizeScale) % CastElTySize != 0 ||
  124. (AllocElTySize*ArrayOffset ) % CastElTySize != 0) return nullptr;
  125. // We don't currently support arrays of scalable types.
  126. assert(!AllocIsScalable || (ArrayOffset == 1 && ArraySizeScale == 0));
  127. unsigned Scale = (AllocElTySize*ArraySizeScale)/CastElTySize;
  128. Value *Amt = nullptr;
  129. if (Scale == 1) {
  130. Amt = NumElements;
  131. } else {
  132. Amt = ConstantInt::get(AI.getArraySize()->getType(), Scale);
  133. // Insert before the alloca, not before the cast.
  134. Amt = Builder.CreateMul(Amt, NumElements);
  135. }
  136. if (uint64_t Offset = (AllocElTySize*ArrayOffset)/CastElTySize) {
  137. Value *Off = ConstantInt::get(AI.getArraySize()->getType(),
  138. Offset, true);
  139. Amt = Builder.CreateAdd(Amt, Off);
  140. }
  141. AllocaInst *New = Builder.CreateAlloca(CastElTy, AI.getAddressSpace(), Amt);
  142. New->setAlignment(AI.getAlign());
  143. New->takeName(&AI);
  144. New->setUsedWithInAlloca(AI.isUsedWithInAlloca());
  145. // If the allocation has multiple real uses, insert a cast and change all
  146. // things that used it to use the new cast. This will also hack on CI, but it
  147. // will die soon.
  148. if (!AI.hasOneUse()) {
  149. // New is the allocation instruction, pointer typed. AI is the original
  150. // allocation instruction, also pointer typed. Thus, cast to use is BitCast.
  151. Value *NewCast = Builder.CreateBitCast(New, AI.getType(), "tmpcast");
  152. replaceInstUsesWith(AI, NewCast);
  153. eraseInstFromFunction(AI);
  154. }
  155. return replaceInstUsesWith(CI, New);
  156. }
  157. /// Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns
  158. /// true for, actually insert the code to evaluate the expression.
  159. Value *InstCombinerImpl::EvaluateInDifferentType(Value *V, Type *Ty,
  160. bool isSigned) {
  161. if (Constant *C = dyn_cast<Constant>(V)) {
  162. C = ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/);
  163. // If we got a constantexpr back, try to simplify it with DL info.
  164. return ConstantFoldConstant(C, DL, &TLI);
  165. }
  166. // Otherwise, it must be an instruction.
  167. Instruction *I = cast<Instruction>(V);
  168. Instruction *Res = nullptr;
  169. unsigned Opc = I->getOpcode();
  170. switch (Opc) {
  171. case Instruction::Add:
  172. case Instruction::Sub:
  173. case Instruction::Mul:
  174. case Instruction::And:
  175. case Instruction::Or:
  176. case Instruction::Xor:
  177. case Instruction::AShr:
  178. case Instruction::LShr:
  179. case Instruction::Shl:
  180. case Instruction::UDiv:
  181. case Instruction::URem: {
  182. Value *LHS = EvaluateInDifferentType(I->getOperand(0), Ty, isSigned);
  183. Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
  184. Res = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
  185. break;
  186. }
  187. case Instruction::Trunc:
  188. case Instruction::ZExt:
  189. case Instruction::SExt:
  190. // If the source type of the cast is the type we're trying for then we can
  191. // just return the source. There's no need to insert it because it is not
  192. // new.
  193. if (I->getOperand(0)->getType() == Ty)
  194. return I->getOperand(0);
  195. // Otherwise, must be the same type of cast, so just reinsert a new one.
  196. // This also handles the case of zext(trunc(x)) -> zext(x).
  197. Res = CastInst::CreateIntegerCast(I->getOperand(0), Ty,
  198. Opc == Instruction::SExt);
  199. break;
  200. case Instruction::Select: {
  201. Value *True = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
  202. Value *False = EvaluateInDifferentType(I->getOperand(2), Ty, isSigned);
  203. Res = SelectInst::Create(I->getOperand(0), True, False);
  204. break;
  205. }
  206. case Instruction::PHI: {
  207. PHINode *OPN = cast<PHINode>(I);
  208. PHINode *NPN = PHINode::Create(Ty, OPN->getNumIncomingValues());
  209. for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) {
  210. Value *V =
  211. EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned);
  212. NPN->addIncoming(V, OPN->getIncomingBlock(i));
  213. }
  214. Res = NPN;
  215. break;
  216. }
  217. default:
  218. // TODO: Can handle more cases here.
  219. llvm_unreachable("Unreachable!");
  220. }
  221. Res->takeName(I);
  222. return InsertNewInstWith(Res, *I);
  223. }
  224. Instruction::CastOps
  225. InstCombinerImpl::isEliminableCastPair(const CastInst *CI1,
  226. const CastInst *CI2) {
  227. Type *SrcTy = CI1->getSrcTy();
  228. Type *MidTy = CI1->getDestTy();
  229. Type *DstTy = CI2->getDestTy();
  230. Instruction::CastOps firstOp = CI1->getOpcode();
  231. Instruction::CastOps secondOp = CI2->getOpcode();
  232. Type *SrcIntPtrTy =
  233. SrcTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(SrcTy) : nullptr;
  234. Type *MidIntPtrTy =
  235. MidTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(MidTy) : nullptr;
  236. Type *DstIntPtrTy =
  237. DstTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(DstTy) : nullptr;
  238. unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy,
  239. DstTy, SrcIntPtrTy, MidIntPtrTy,
  240. DstIntPtrTy);
  241. // We don't want to form an inttoptr or ptrtoint that converts to an integer
  242. // type that differs from the pointer size.
  243. if ((Res == Instruction::IntToPtr && SrcTy != DstIntPtrTy) ||
  244. (Res == Instruction::PtrToInt && DstTy != SrcIntPtrTy))
  245. Res = 0;
  246. return Instruction::CastOps(Res);
  247. }
  248. /// Implement the transforms common to all CastInst visitors.
  249. Instruction *InstCombinerImpl::commonCastTransforms(CastInst &CI) {
  250. Value *Src = CI.getOperand(0);
  251. Type *Ty = CI.getType();
  252. // Try to eliminate a cast of a cast.
  253. if (auto *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast
  254. if (Instruction::CastOps NewOpc = isEliminableCastPair(CSrc, &CI)) {
  255. // The first cast (CSrc) is eliminable so we need to fix up or replace
  256. // the second cast (CI). CSrc will then have a good chance of being dead.
  257. auto *Res = CastInst::Create(NewOpc, CSrc->getOperand(0), Ty);
  258. // Point debug users of the dying cast to the new one.
  259. if (CSrc->hasOneUse())
  260. replaceAllDbgUsesWith(*CSrc, *Res, CI, DT);
  261. return Res;
  262. }
  263. }
  264. if (auto *Sel = dyn_cast<SelectInst>(Src)) {
  265. // We are casting a select. Try to fold the cast into the select if the
  266. // select does not have a compare instruction with matching operand types
  267. // or the select is likely better done in a narrow type.
  268. // Creating a select with operands that are different sizes than its
  269. // condition may inhibit other folds and lead to worse codegen.
  270. auto *Cmp = dyn_cast<CmpInst>(Sel->getCondition());
  271. if (!Cmp || Cmp->getOperand(0)->getType() != Sel->getType() ||
  272. (CI.getOpcode() == Instruction::Trunc &&
  273. shouldChangeType(CI.getSrcTy(), CI.getType()))) {
  274. if (Instruction *NV = FoldOpIntoSelect(CI, Sel)) {
  275. replaceAllDbgUsesWith(*Sel, *NV, CI, DT);
  276. return NV;
  277. }
  278. }
  279. }
  280. // If we are casting a PHI, then fold the cast into the PHI.
  281. if (auto *PN = dyn_cast<PHINode>(Src)) {
  282. // Don't do this if it would create a PHI node with an illegal type from a
  283. // legal type.
  284. if (!Src->getType()->isIntegerTy() || !CI.getType()->isIntegerTy() ||
  285. shouldChangeType(CI.getSrcTy(), CI.getType()))
  286. if (Instruction *NV = foldOpIntoPhi(CI, PN))
  287. return NV;
  288. }
  289. // Canonicalize a unary shuffle after the cast if neither operation changes
  290. // the size or element size of the input vector.
  291. // TODO: We could allow size-changing ops if that doesn't harm codegen.
  292. // cast (shuffle X, Mask) --> shuffle (cast X), Mask
  293. Value *X;
  294. ArrayRef<int> Mask;
  295. if (match(Src, m_OneUse(m_Shuffle(m_Value(X), m_Undef(), m_Mask(Mask))))) {
  296. // TODO: Allow scalable vectors?
  297. auto *SrcTy = dyn_cast<FixedVectorType>(X->getType());
  298. auto *DestTy = dyn_cast<FixedVectorType>(Ty);
  299. if (SrcTy && DestTy &&
  300. SrcTy->getNumElements() == DestTy->getNumElements() &&
  301. SrcTy->getPrimitiveSizeInBits() == DestTy->getPrimitiveSizeInBits()) {
  302. Value *CastX = Builder.CreateCast(CI.getOpcode(), X, DestTy);
  303. return new ShuffleVectorInst(CastX, Mask);
  304. }
  305. }
  306. return nullptr;
  307. }
  308. /// Constants and extensions/truncates from the destination type are always
  309. /// free to be evaluated in that type. This is a helper for canEvaluate*.
  310. static bool canAlwaysEvaluateInType(Value *V, Type *Ty) {
  311. if (isa<Constant>(V))
  312. return true;
  313. Value *X;
  314. if ((match(V, m_ZExtOrSExt(m_Value(X))) || match(V, m_Trunc(m_Value(X)))) &&
  315. X->getType() == Ty)
  316. return true;
  317. return false;
  318. }
  319. /// Filter out values that we can not evaluate in the destination type for free.
  320. /// This is a helper for canEvaluate*.
  321. static bool canNotEvaluateInType(Value *V, Type *Ty) {
  322. assert(!isa<Constant>(V) && "Constant should already be handled.");
  323. if (!isa<Instruction>(V))
  324. return true;
  325. // We don't extend or shrink something that has multiple uses -- doing so
  326. // would require duplicating the instruction which isn't profitable.
  327. if (!V->hasOneUse())
  328. return true;
  329. return false;
  330. }
  331. /// Return true if we can evaluate the specified expression tree as type Ty
  332. /// instead of its larger type, and arrive with the same value.
  333. /// This is used by code that tries to eliminate truncates.
  334. ///
  335. /// Ty will always be a type smaller than V. We should return true if trunc(V)
  336. /// can be computed by computing V in the smaller type. If V is an instruction,
  337. /// then trunc(inst(x,y)) can be computed as inst(trunc(x),trunc(y)), which only
  338. /// makes sense if x and y can be efficiently truncated.
  339. ///
  340. /// This function works on both vectors and scalars.
  341. ///
  342. static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombinerImpl &IC,
  343. Instruction *CxtI) {
  344. if (canAlwaysEvaluateInType(V, Ty))
  345. return true;
  346. if (canNotEvaluateInType(V, Ty))
  347. return false;
  348. auto *I = cast<Instruction>(V);
  349. Type *OrigTy = V->getType();
  350. switch (I->getOpcode()) {
  351. case Instruction::Add:
  352. case Instruction::Sub:
  353. case Instruction::Mul:
  354. case Instruction::And:
  355. case Instruction::Or:
  356. case Instruction::Xor:
  357. // These operators can all arbitrarily be extended or truncated.
  358. return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
  359. canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
  360. case Instruction::UDiv:
  361. case Instruction::URem: {
  362. // UDiv and URem can be truncated if all the truncated bits are zero.
  363. uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
  364. uint32_t BitWidth = Ty->getScalarSizeInBits();
  365. assert(BitWidth < OrigBitWidth && "Unexpected bitwidths!");
  366. APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
  367. if (IC.MaskedValueIsZero(I->getOperand(0), Mask, 0, CxtI) &&
  368. IC.MaskedValueIsZero(I->getOperand(1), Mask, 0, CxtI)) {
  369. return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
  370. canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
  371. }
  372. break;
  373. }
  374. case Instruction::Shl: {
  375. // If we are truncating the result of this SHL, and if it's a shift of an
  376. // inrange amount, we can always perform a SHL in a smaller type.
  377. uint32_t BitWidth = Ty->getScalarSizeInBits();
  378. KnownBits AmtKnownBits =
  379. llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
  380. if (AmtKnownBits.getMaxValue().ult(BitWidth))
  381. return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
  382. canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
  383. break;
  384. }
  385. case Instruction::LShr: {
  386. // If this is a truncate of a logical shr, we can truncate it to a smaller
  387. // lshr iff we know that the bits we would otherwise be shifting in are
  388. // already zeros.
  389. // TODO: It is enough to check that the bits we would be shifting in are
  390. // zero - use AmtKnownBits.getMaxValue().
  391. uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
  392. uint32_t BitWidth = Ty->getScalarSizeInBits();
  393. KnownBits AmtKnownBits =
  394. llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
  395. APInt ShiftedBits = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
  396. if (AmtKnownBits.getMaxValue().ult(BitWidth) &&
  397. IC.MaskedValueIsZero(I->getOperand(0), ShiftedBits, 0, CxtI)) {
  398. return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
  399. canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
  400. }
  401. break;
  402. }
  403. case Instruction::AShr: {
  404. // If this is a truncate of an arithmetic shr, we can truncate it to a
  405. // smaller ashr iff we know that all the bits from the sign bit of the
  406. // original type and the sign bit of the truncate type are similar.
  407. // TODO: It is enough to check that the bits we would be shifting in are
  408. // similar to sign bit of the truncate type.
  409. uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
  410. uint32_t BitWidth = Ty->getScalarSizeInBits();
  411. KnownBits AmtKnownBits =
  412. llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
  413. unsigned ShiftedBits = OrigBitWidth - BitWidth;
  414. if (AmtKnownBits.getMaxValue().ult(BitWidth) &&
  415. ShiftedBits < IC.ComputeNumSignBits(I->getOperand(0), 0, CxtI))
  416. return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
  417. canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
  418. break;
  419. }
  420. case Instruction::Trunc:
  421. // trunc(trunc(x)) -> trunc(x)
  422. return true;
  423. case Instruction::ZExt:
  424. case Instruction::SExt:
  425. // trunc(ext(x)) -> ext(x) if the source type is smaller than the new dest
  426. // trunc(ext(x)) -> trunc(x) if the source type is larger than the new dest
  427. return true;
  428. case Instruction::Select: {
  429. SelectInst *SI = cast<SelectInst>(I);
  430. return canEvaluateTruncated(SI->getTrueValue(), Ty, IC, CxtI) &&
  431. canEvaluateTruncated(SI->getFalseValue(), Ty, IC, CxtI);
  432. }
  433. case Instruction::PHI: {
  434. // We can change a phi if we can change all operands. Note that we never
  435. // get into trouble with cyclic PHIs here because we only consider
  436. // instructions with a single use.
  437. PHINode *PN = cast<PHINode>(I);
  438. for (Value *IncValue : PN->incoming_values())
  439. if (!canEvaluateTruncated(IncValue, Ty, IC, CxtI))
  440. return false;
  441. return true;
  442. }
  443. default:
  444. // TODO: Can handle more cases here.
  445. break;
  446. }
  447. return false;
  448. }
  449. /// Given a vector that is bitcast to an integer, optionally logically
  450. /// right-shifted, and truncated, convert it to an extractelement.
  451. /// Example (big endian):
  452. /// trunc (lshr (bitcast <4 x i32> %X to i128), 32) to i32
  453. /// --->
  454. /// extractelement <4 x i32> %X, 1
  455. static Instruction *foldVecTruncToExtElt(TruncInst &Trunc,
  456. InstCombinerImpl &IC) {
  457. Value *TruncOp = Trunc.getOperand(0);
  458. Type *DestType = Trunc.getType();
  459. if (!TruncOp->hasOneUse() || !isa<IntegerType>(DestType))
  460. return nullptr;
  461. Value *VecInput = nullptr;
  462. ConstantInt *ShiftVal = nullptr;
  463. if (!match(TruncOp, m_CombineOr(m_BitCast(m_Value(VecInput)),
  464. m_LShr(m_BitCast(m_Value(VecInput)),
  465. m_ConstantInt(ShiftVal)))) ||
  466. !isa<VectorType>(VecInput->getType()))
  467. return nullptr;
  468. VectorType *VecType = cast<VectorType>(VecInput->getType());
  469. unsigned VecWidth = VecType->getPrimitiveSizeInBits();
  470. unsigned DestWidth = DestType->getPrimitiveSizeInBits();
  471. unsigned ShiftAmount = ShiftVal ? ShiftVal->getZExtValue() : 0;
  472. if ((VecWidth % DestWidth != 0) || (ShiftAmount % DestWidth != 0))
  473. return nullptr;
  474. // If the element type of the vector doesn't match the result type,
  475. // bitcast it to a vector type that we can extract from.
  476. unsigned NumVecElts = VecWidth / DestWidth;
  477. if (VecType->getElementType() != DestType) {
  478. VecType = FixedVectorType::get(DestType, NumVecElts);
  479. VecInput = IC.Builder.CreateBitCast(VecInput, VecType, "bc");
  480. }
  481. unsigned Elt = ShiftAmount / DestWidth;
  482. if (IC.getDataLayout().isBigEndian())
  483. Elt = NumVecElts - 1 - Elt;
  484. return ExtractElementInst::Create(VecInput, IC.Builder.getInt32(Elt));
  485. }
  486. /// Funnel/Rotate left/right may occur in a wider type than necessary because of
  487. /// type promotion rules. Try to narrow the inputs and convert to funnel shift.
  488. Instruction *InstCombinerImpl::narrowFunnelShift(TruncInst &Trunc) {
  489. assert((isa<VectorType>(Trunc.getSrcTy()) ||
  490. shouldChangeType(Trunc.getSrcTy(), Trunc.getType())) &&
  491. "Don't narrow to an illegal scalar type");
  492. // Bail out on strange types. It is possible to handle some of these patterns
  493. // even with non-power-of-2 sizes, but it is not a likely scenario.
  494. Type *DestTy = Trunc.getType();
  495. unsigned NarrowWidth = DestTy->getScalarSizeInBits();
  496. unsigned WideWidth = Trunc.getSrcTy()->getScalarSizeInBits();
  497. if (!isPowerOf2_32(NarrowWidth))
  498. return nullptr;
  499. // First, find an or'd pair of opposite shifts:
  500. // trunc (or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1))
  501. BinaryOperator *Or0, *Or1;
  502. if (!match(Trunc.getOperand(0), m_OneUse(m_Or(m_BinOp(Or0), m_BinOp(Or1)))))
  503. return nullptr;
  504. Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1;
  505. if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(ShVal0), m_Value(ShAmt0)))) ||
  506. !match(Or1, m_OneUse(m_LogicalShift(m_Value(ShVal1), m_Value(ShAmt1)))) ||
  507. Or0->getOpcode() == Or1->getOpcode())
  508. return nullptr;
  509. // Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)).
  510. if (Or0->getOpcode() == BinaryOperator::LShr) {
  511. std::swap(Or0, Or1);
  512. std::swap(ShVal0, ShVal1);
  513. std::swap(ShAmt0, ShAmt1);
  514. }
  515. assert(Or0->getOpcode() == BinaryOperator::Shl &&
  516. Or1->getOpcode() == BinaryOperator::LShr &&
  517. "Illegal or(shift,shift) pair");
  518. // Match the shift amount operands for a funnel/rotate pattern. This always
  519. // matches a subtraction on the R operand.
  520. auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * {
  521. // The shift amounts may add up to the narrow bit width:
  522. // (shl ShVal0, L) | (lshr ShVal1, Width - L)
  523. // If this is a funnel shift (different operands are shifted), then the
  524. // shift amount can not over-shift (create poison) in the narrow type.
  525. unsigned MaxShiftAmountWidth = Log2_32(NarrowWidth);
  526. APInt HiBitMask = ~APInt::getLowBitsSet(WideWidth, MaxShiftAmountWidth);
  527. if (ShVal0 == ShVal1 || MaskedValueIsZero(L, HiBitMask))
  528. if (match(R, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(L)))))
  529. return L;
  530. // The following patterns currently only work for rotation patterns.
  531. // TODO: Add more general funnel-shift compatible patterns.
  532. if (ShVal0 != ShVal1)
  533. return nullptr;
  534. // The shift amount may be masked with negation:
  535. // (shl ShVal0, (X & (Width - 1))) | (lshr ShVal1, ((-X) & (Width - 1)))
  536. Value *X;
  537. unsigned Mask = Width - 1;
  538. if (match(L, m_And(m_Value(X), m_SpecificInt(Mask))) &&
  539. match(R, m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask))))
  540. return X;
  541. // Same as above, but the shift amount may be extended after masking:
  542. if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&
  543. match(R, m_ZExt(m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask)))))
  544. return X;
  545. return nullptr;
  546. };
  547. Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, NarrowWidth);
  548. bool IsFshl = true; // Sub on LSHR.
  549. if (!ShAmt) {
  550. ShAmt = matchShiftAmount(ShAmt1, ShAmt0, NarrowWidth);
  551. IsFshl = false; // Sub on SHL.
  552. }
  553. if (!ShAmt)
  554. return nullptr;
  555. // The right-shifted value must have high zeros in the wide type (for example
  556. // from 'zext', 'and' or 'shift'). High bits of the left-shifted value are
  557. // truncated, so those do not matter.
  558. APInt HiBitMask = APInt::getHighBitsSet(WideWidth, WideWidth - NarrowWidth);
  559. if (!MaskedValueIsZero(ShVal1, HiBitMask, 0, &Trunc))
  560. return nullptr;
  561. // We have an unnecessarily wide rotate!
  562. // trunc (or (shl ShVal0, ShAmt), (lshr ShVal1, BitWidth - ShAmt))
  563. // Narrow the inputs and convert to funnel shift intrinsic:
  564. // llvm.fshl.i8(trunc(ShVal), trunc(ShVal), trunc(ShAmt))
  565. Value *NarrowShAmt = Builder.CreateTrunc(ShAmt, DestTy);
  566. Value *X, *Y;
  567. X = Y = Builder.CreateTrunc(ShVal0, DestTy);
  568. if (ShVal0 != ShVal1)
  569. Y = Builder.CreateTrunc(ShVal1, DestTy);
  570. Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;
  571. Function *F = Intrinsic::getDeclaration(Trunc.getModule(), IID, DestTy);
  572. return CallInst::Create(F, {X, Y, NarrowShAmt});
  573. }
  574. /// Try to narrow the width of math or bitwise logic instructions by pulling a
  575. /// truncate ahead of binary operators.
  576. /// TODO: Transforms for truncated shifts should be moved into here.
  577. Instruction *InstCombinerImpl::narrowBinOp(TruncInst &Trunc) {
  578. Type *SrcTy = Trunc.getSrcTy();
  579. Type *DestTy = Trunc.getType();
  580. if (!isa<VectorType>(SrcTy) && !shouldChangeType(SrcTy, DestTy))
  581. return nullptr;
  582. BinaryOperator *BinOp;
  583. if (!match(Trunc.getOperand(0), m_OneUse(m_BinOp(BinOp))))
  584. return nullptr;
  585. Value *BinOp0 = BinOp->getOperand(0);
  586. Value *BinOp1 = BinOp->getOperand(1);
  587. switch (BinOp->getOpcode()) {
  588. case Instruction::And:
  589. case Instruction::Or:
  590. case Instruction::Xor:
  591. case Instruction::Add:
  592. case Instruction::Sub:
  593. case Instruction::Mul: {
  594. Constant *C;
  595. if (match(BinOp0, m_Constant(C))) {
  596. // trunc (binop C, X) --> binop (trunc C', X)
  597. Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
  598. Value *TruncX = Builder.CreateTrunc(BinOp1, DestTy);
  599. return BinaryOperator::Create(BinOp->getOpcode(), NarrowC, TruncX);
  600. }
  601. if (match(BinOp1, m_Constant(C))) {
  602. // trunc (binop X, C) --> binop (trunc X, C')
  603. Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
  604. Value *TruncX = Builder.CreateTrunc(BinOp0, DestTy);
  605. return BinaryOperator::Create(BinOp->getOpcode(), TruncX, NarrowC);
  606. }
  607. Value *X;
  608. if (match(BinOp0, m_ZExtOrSExt(m_Value(X))) && X->getType() == DestTy) {
  609. // trunc (binop (ext X), Y) --> binop X, (trunc Y)
  610. Value *NarrowOp1 = Builder.CreateTrunc(BinOp1, DestTy);
  611. return BinaryOperator::Create(BinOp->getOpcode(), X, NarrowOp1);
  612. }
  613. if (match(BinOp1, m_ZExtOrSExt(m_Value(X))) && X->getType() == DestTy) {
  614. // trunc (binop Y, (ext X)) --> binop (trunc Y), X
  615. Value *NarrowOp0 = Builder.CreateTrunc(BinOp0, DestTy);
  616. return BinaryOperator::Create(BinOp->getOpcode(), NarrowOp0, X);
  617. }
  618. break;
  619. }
  620. default: break;
  621. }
  622. if (Instruction *NarrowOr = narrowFunnelShift(Trunc))
  623. return NarrowOr;
  624. return nullptr;
  625. }
  626. /// Try to narrow the width of a splat shuffle. This could be generalized to any
  627. /// shuffle with a constant operand, but we limit the transform to avoid
  628. /// creating a shuffle type that targets may not be able to lower effectively.
  629. static Instruction *shrinkSplatShuffle(TruncInst &Trunc,
  630. InstCombiner::BuilderTy &Builder) {
  631. auto *Shuf = dyn_cast<ShuffleVectorInst>(Trunc.getOperand(0));
  632. if (Shuf && Shuf->hasOneUse() && match(Shuf->getOperand(1), m_Undef()) &&
  633. is_splat(Shuf->getShuffleMask()) &&
  634. Shuf->getType() == Shuf->getOperand(0)->getType()) {
  635. // trunc (shuf X, Undef, SplatMask) --> shuf (trunc X), Poison, SplatMask
  636. // trunc (shuf X, Poison, SplatMask) --> shuf (trunc X), Poison, SplatMask
  637. Value *NarrowOp = Builder.CreateTrunc(Shuf->getOperand(0), Trunc.getType());
  638. return new ShuffleVectorInst(NarrowOp, Shuf->getShuffleMask());
  639. }
  640. return nullptr;
  641. }
  642. /// Try to narrow the width of an insert element. This could be generalized for
  643. /// any vector constant, but we limit the transform to insertion into undef to
  644. /// avoid potential backend problems from unsupported insertion widths. This
  645. /// could also be extended to handle the case of inserting a scalar constant
  646. /// into a vector variable.
  647. static Instruction *shrinkInsertElt(CastInst &Trunc,
  648. InstCombiner::BuilderTy &Builder) {
  649. Instruction::CastOps Opcode = Trunc.getOpcode();
  650. assert((Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) &&
  651. "Unexpected instruction for shrinking");
  652. auto *InsElt = dyn_cast<InsertElementInst>(Trunc.getOperand(0));
  653. if (!InsElt || !InsElt->hasOneUse())
  654. return nullptr;
  655. Type *DestTy = Trunc.getType();
  656. Type *DestScalarTy = DestTy->getScalarType();
  657. Value *VecOp = InsElt->getOperand(0);
  658. Value *ScalarOp = InsElt->getOperand(1);
  659. Value *Index = InsElt->getOperand(2);
  660. if (match(VecOp, m_Undef())) {
  661. // trunc (inselt undef, X, Index) --> inselt undef, (trunc X), Index
  662. // fptrunc (inselt undef, X, Index) --> inselt undef, (fptrunc X), Index
  663. UndefValue *NarrowUndef = UndefValue::get(DestTy);
  664. Value *NarrowOp = Builder.CreateCast(Opcode, ScalarOp, DestScalarTy);
  665. return InsertElementInst::Create(NarrowUndef, NarrowOp, Index);
  666. }
  667. return nullptr;
  668. }
  669. Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) {
  670. if (Instruction *Result = commonCastTransforms(Trunc))
  671. return Result;
  672. Value *Src = Trunc.getOperand(0);
  673. Type *DestTy = Trunc.getType(), *SrcTy = Src->getType();
  674. unsigned DestWidth = DestTy->getScalarSizeInBits();
  675. unsigned SrcWidth = SrcTy->getScalarSizeInBits();
  676. // Attempt to truncate the entire input expression tree to the destination
  677. // type. Only do this if the dest type is a simple type, don't convert the
  678. // expression tree to something weird like i93 unless the source is also
  679. // strange.
  680. if ((DestTy->isVectorTy() || shouldChangeType(SrcTy, DestTy)) &&
  681. canEvaluateTruncated(Src, DestTy, *this, &Trunc)) {
  682. // If this cast is a truncate, evaluting in a different type always
  683. // eliminates the cast, so it is always a win.
  684. LLVM_DEBUG(
  685. dbgs() << "ICE: EvaluateInDifferentType converting expression type"
  686. " to avoid cast: "
  687. << Trunc << '\n');
  688. Value *Res = EvaluateInDifferentType(Src, DestTy, false);
  689. assert(Res->getType() == DestTy);
  690. return replaceInstUsesWith(Trunc, Res);
  691. }
  692. // For integer types, check if we can shorten the entire input expression to
  693. // DestWidth * 2, which won't allow removing the truncate, but reducing the
  694. // width may enable further optimizations, e.g. allowing for larger
  695. // vectorization factors.
  696. if (auto *DestITy = dyn_cast<IntegerType>(DestTy)) {
  697. if (DestWidth * 2 < SrcWidth) {
  698. auto *NewDestTy = DestITy->getExtendedType();
  699. if (shouldChangeType(SrcTy, NewDestTy) &&
  700. canEvaluateTruncated(Src, NewDestTy, *this, &Trunc)) {
  701. LLVM_DEBUG(
  702. dbgs() << "ICE: EvaluateInDifferentType converting expression type"
  703. " to reduce the width of operand of"
  704. << Trunc << '\n');
  705. Value *Res = EvaluateInDifferentType(Src, NewDestTy, false);
  706. return new TruncInst(Res, DestTy);
  707. }
  708. }
  709. }
  710. // Test if the trunc is the user of a select which is part of a
  711. // minimum or maximum operation. If so, don't do any more simplification.
  712. // Even simplifying demanded bits can break the canonical form of a
  713. // min/max.
  714. Value *LHS, *RHS;
  715. if (SelectInst *Sel = dyn_cast<SelectInst>(Src))
  716. if (matchSelectPattern(Sel, LHS, RHS).Flavor != SPF_UNKNOWN)
  717. return nullptr;
  718. // See if we can simplify any instructions used by the input whose sole
  719. // purpose is to compute bits we don't care about.
  720. if (SimplifyDemandedInstructionBits(Trunc))
  721. return &Trunc;
  722. if (DestWidth == 1) {
  723. Value *Zero = Constant::getNullValue(SrcTy);
  724. if (DestTy->isIntegerTy()) {
  725. // Canonicalize trunc x to i1 -> icmp ne (and x, 1), 0 (scalar only).
  726. // TODO: We canonicalize to more instructions here because we are probably
  727. // lacking equivalent analysis for trunc relative to icmp. There may also
  728. // be codegen concerns. If those trunc limitations were removed, we could
  729. // remove this transform.
  730. Value *And = Builder.CreateAnd(Src, ConstantInt::get(SrcTy, 1));
  731. return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
  732. }
  733. // For vectors, we do not canonicalize all truncs to icmp, so optimize
  734. // patterns that would be covered within visitICmpInst.
  735. Value *X;
  736. Constant *C;
  737. if (match(Src, m_OneUse(m_LShr(m_Value(X), m_Constant(C))))) {
  738. // trunc (lshr X, C) to i1 --> icmp ne (and X, C'), 0
  739. Constant *One = ConstantInt::get(SrcTy, APInt(SrcWidth, 1));
  740. Constant *MaskC = ConstantExpr::getShl(One, C);
  741. Value *And = Builder.CreateAnd(X, MaskC);
  742. return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
  743. }
  744. if (match(Src, m_OneUse(m_c_Or(m_LShr(m_Value(X), m_Constant(C)),
  745. m_Deferred(X))))) {
  746. // trunc (or (lshr X, C), X) to i1 --> icmp ne (and X, C'), 0
  747. Constant *One = ConstantInt::get(SrcTy, APInt(SrcWidth, 1));
  748. Constant *MaskC = ConstantExpr::getShl(One, C);
  749. MaskC = ConstantExpr::getOr(MaskC, One);
  750. Value *And = Builder.CreateAnd(X, MaskC);
  751. return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
  752. }
  753. }
  754. Value *A, *B;
  755. Constant *C;
  756. if (match(Src, m_LShr(m_SExt(m_Value(A)), m_Constant(C)))) {
  757. unsigned AWidth = A->getType()->getScalarSizeInBits();
  758. unsigned MaxShiftAmt = SrcWidth - std::max(DestWidth, AWidth);
  759. auto *OldSh = cast<Instruction>(Src);
  760. bool IsExact = OldSh->isExact();
  761. // If the shift is small enough, all zero bits created by the shift are
  762. // removed by the trunc.
  763. if (match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULE,
  764. APInt(SrcWidth, MaxShiftAmt)))) {
  765. // trunc (lshr (sext A), C) --> ashr A, C
  766. if (A->getType() == DestTy) {
  767. Constant *MaxAmt = ConstantInt::get(SrcTy, DestWidth - 1, false);
  768. Constant *ShAmt = ConstantExpr::getUMin(C, MaxAmt);
  769. ShAmt = ConstantExpr::getTrunc(ShAmt, A->getType());
  770. ShAmt = Constant::mergeUndefsWith(ShAmt, C);
  771. return IsExact ? BinaryOperator::CreateExactAShr(A, ShAmt)
  772. : BinaryOperator::CreateAShr(A, ShAmt);
  773. }
  774. // The types are mismatched, so create a cast after shifting:
  775. // trunc (lshr (sext A), C) --> sext/trunc (ashr A, C)
  776. if (Src->hasOneUse()) {
  777. Constant *MaxAmt = ConstantInt::get(SrcTy, AWidth - 1, false);
  778. Constant *ShAmt = ConstantExpr::getUMin(C, MaxAmt);
  779. ShAmt = ConstantExpr::getTrunc(ShAmt, A->getType());
  780. Value *Shift = Builder.CreateAShr(A, ShAmt, "", IsExact);
  781. return CastInst::CreateIntegerCast(Shift, DestTy, true);
  782. }
  783. }
  784. // TODO: Mask high bits with 'and'.
  785. }
  786. // trunc (*shr (trunc A), C) --> trunc(*shr A, C)
  787. if (match(Src, m_OneUse(m_Shr(m_Trunc(m_Value(A)), m_Constant(C))))) {
  788. unsigned MaxShiftAmt = SrcWidth - DestWidth;
  789. // If the shift is small enough, all zero/sign bits created by the shift are
  790. // removed by the trunc.
  791. if (match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULE,
  792. APInt(SrcWidth, MaxShiftAmt)))) {
  793. auto *OldShift = cast<Instruction>(Src);
  794. bool IsExact = OldShift->isExact();
  795. auto *ShAmt = ConstantExpr::getIntegerCast(C, A->getType(), true);
  796. ShAmt = Constant::mergeUndefsWith(ShAmt, C);
  797. Value *Shift =
  798. OldShift->getOpcode() == Instruction::AShr
  799. ? Builder.CreateAShr(A, ShAmt, OldShift->getName(), IsExact)
  800. : Builder.CreateLShr(A, ShAmt, OldShift->getName(), IsExact);
  801. return CastInst::CreateTruncOrBitCast(Shift, DestTy);
  802. }
  803. }
  804. if (Instruction *I = narrowBinOp(Trunc))
  805. return I;
  806. if (Instruction *I = shrinkSplatShuffle(Trunc, Builder))
  807. return I;
  808. if (Instruction *I = shrinkInsertElt(Trunc, Builder))
  809. return I;
  810. if (Src->hasOneUse() &&
  811. (isa<VectorType>(SrcTy) || shouldChangeType(SrcTy, DestTy))) {
  812. // Transform "trunc (shl X, cst)" -> "shl (trunc X), cst" so long as the
  813. // dest type is native and cst < dest size.
  814. if (match(Src, m_Shl(m_Value(A), m_Constant(C))) &&
  815. !match(A, m_Shr(m_Value(), m_Constant()))) {
  816. // Skip shifts of shift by constants. It undoes a combine in
  817. // FoldShiftByConstant and is the extend in reg pattern.
  818. APInt Threshold = APInt(C->getType()->getScalarSizeInBits(), DestWidth);
  819. if (match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, Threshold))) {
  820. Value *NewTrunc = Builder.CreateTrunc(A, DestTy, A->getName() + ".tr");
  821. return BinaryOperator::Create(Instruction::Shl, NewTrunc,
  822. ConstantExpr::getTrunc(C, DestTy));
  823. }
  824. }
  825. }
  826. if (Instruction *I = foldVecTruncToExtElt(Trunc, *this))
  827. return I;
  828. // Whenever an element is extracted from a vector, and then truncated,
  829. // canonicalize by converting it to a bitcast followed by an
  830. // extractelement.
  831. //
  832. // Example (little endian):
  833. // trunc (extractelement <4 x i64> %X, 0) to i32
  834. // --->
  835. // extractelement <8 x i32> (bitcast <4 x i64> %X to <8 x i32>), i32 0
  836. Value *VecOp;
  837. ConstantInt *Cst;
  838. if (match(Src, m_OneUse(m_ExtractElt(m_Value(VecOp), m_ConstantInt(Cst))))) {
  839. auto *VecOpTy = cast<VectorType>(VecOp->getType());
  840. auto VecElts = VecOpTy->getElementCount();
  841. // A badly fit destination size would result in an invalid cast.
  842. if (SrcWidth % DestWidth == 0) {
  843. uint64_t TruncRatio = SrcWidth / DestWidth;
  844. uint64_t BitCastNumElts = VecElts.getKnownMinValue() * TruncRatio;
  845. uint64_t VecOpIdx = Cst->getZExtValue();
  846. uint64_t NewIdx = DL.isBigEndian() ? (VecOpIdx + 1) * TruncRatio - 1
  847. : VecOpIdx * TruncRatio;
  848. assert(BitCastNumElts <= std::numeric_limits<uint32_t>::max() &&
  849. "overflow 32-bits");
  850. auto *BitCastTo =
  851. VectorType::get(DestTy, BitCastNumElts, VecElts.isScalable());
  852. Value *BitCast = Builder.CreateBitCast(VecOp, BitCastTo);
  853. return ExtractElementInst::Create(BitCast, Builder.getInt32(NewIdx));
  854. }
  855. }
  856. // trunc (ctlz_i32(zext(A), B) --> add(ctlz_i16(A, B), C)
  857. if (match(Src, m_OneUse(m_Intrinsic<Intrinsic::ctlz>(m_ZExt(m_Value(A)),
  858. m_Value(B))))) {
  859. unsigned AWidth = A->getType()->getScalarSizeInBits();
  860. if (AWidth == DestWidth && AWidth > Log2_32(SrcWidth)) {
  861. Value *WidthDiff = ConstantInt::get(A->getType(), SrcWidth - AWidth);
  862. Value *NarrowCtlz =
  863. Builder.CreateIntrinsic(Intrinsic::ctlz, {Trunc.getType()}, {A, B});
  864. return BinaryOperator::CreateAdd(NarrowCtlz, WidthDiff);
  865. }
  866. }
  867. if (match(Src, m_VScale(DL))) {
  868. if (Trunc.getFunction() &&
  869. Trunc.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
  870. Attribute Attr =
  871. Trunc.getFunction()->getFnAttribute(Attribute::VScaleRange);
  872. if (Optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {
  873. if (Log2_32(MaxVScale.getValue()) < DestWidth) {
  874. Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1));
  875. return replaceInstUsesWith(Trunc, VScale);
  876. }
  877. }
  878. }
  879. }
  880. return nullptr;
  881. }
  882. Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext) {
  883. // If we are just checking for a icmp eq of a single bit and zext'ing it
  884. // to an integer, then shift the bit to the appropriate place and then
  885. // cast to integer to avoid the comparison.
  886. const APInt *Op1CV;
  887. if (match(Cmp->getOperand(1), m_APInt(Op1CV))) {
  888. // zext (x <s 0) to i32 --> x>>u31 true if signbit set.
  889. // zext (x >s -1) to i32 --> (x>>u31)^1 true if signbit clear.
  890. if ((Cmp->getPredicate() == ICmpInst::ICMP_SLT && Op1CV->isZero()) ||
  891. (Cmp->getPredicate() == ICmpInst::ICMP_SGT && Op1CV->isAllOnes())) {
  892. Value *In = Cmp->getOperand(0);
  893. Value *Sh = ConstantInt::get(In->getType(),
  894. In->getType()->getScalarSizeInBits() - 1);
  895. In = Builder.CreateLShr(In, Sh, In->getName() + ".lobit");
  896. if (In->getType() != Zext.getType())
  897. In = Builder.CreateIntCast(In, Zext.getType(), false /*ZExt*/);
  898. if (Cmp->getPredicate() == ICmpInst::ICMP_SGT) {
  899. Constant *One = ConstantInt::get(In->getType(), 1);
  900. In = Builder.CreateXor(In, One, In->getName() + ".not");
  901. }
  902. return replaceInstUsesWith(Zext, In);
  903. }
  904. // zext (X == 0) to i32 --> X^1 iff X has only the low bit set.
  905. // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
  906. // zext (X == 1) to i32 --> X iff X has only the low bit set.
  907. // zext (X == 2) to i32 --> X>>1 iff X has only the 2nd bit set.
  908. // zext (X != 0) to i32 --> X iff X has only the low bit set.
  909. // zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set.
  910. // zext (X != 1) to i32 --> X^1 iff X has only the low bit set.
  911. // zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
  912. if ((Op1CV->isZero() || Op1CV->isPowerOf2()) &&
  913. // This only works for EQ and NE
  914. Cmp->isEquality()) {
  915. // If Op1C some other power of two, convert:
  916. KnownBits Known = computeKnownBits(Cmp->getOperand(0), 0, &Zext);
  917. APInt KnownZeroMask(~Known.Zero);
  918. if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1?
  919. bool isNE = Cmp->getPredicate() == ICmpInst::ICMP_NE;
  920. if (!Op1CV->isZero() && (*Op1CV != KnownZeroMask)) {
  921. // (X&4) == 2 --> false
  922. // (X&4) != 2 --> true
  923. Constant *Res = ConstantInt::get(Zext.getType(), isNE);
  924. return replaceInstUsesWith(Zext, Res);
  925. }
  926. uint32_t ShAmt = KnownZeroMask.logBase2();
  927. Value *In = Cmp->getOperand(0);
  928. if (ShAmt) {
  929. // Perform a logical shr by shiftamt.
  930. // Insert the shift to put the result in the low bit.
  931. In = Builder.CreateLShr(In, ConstantInt::get(In->getType(), ShAmt),
  932. In->getName() + ".lobit");
  933. }
  934. if (!Op1CV->isZero() == isNE) { // Toggle the low bit.
  935. Constant *One = ConstantInt::get(In->getType(), 1);
  936. In = Builder.CreateXor(In, One);
  937. }
  938. if (Zext.getType() == In->getType())
  939. return replaceInstUsesWith(Zext, In);
  940. Value *IntCast = Builder.CreateIntCast(In, Zext.getType(), false);
  941. return replaceInstUsesWith(Zext, IntCast);
  942. }
  943. }
  944. }
  945. if (Cmp->isEquality() && Zext.getType() == Cmp->getOperand(0)->getType()) {
  946. // Test if a bit is clear/set using a shifted-one mask:
  947. // zext (icmp eq (and X, (1 << ShAmt)), 0) --> and (lshr (not X), ShAmt), 1
  948. // zext (icmp ne (and X, (1 << ShAmt)), 0) --> and (lshr X, ShAmt), 1
  949. Value *X, *ShAmt;
  950. if (Cmp->hasOneUse() && match(Cmp->getOperand(1), m_ZeroInt()) &&
  951. match(Cmp->getOperand(0),
  952. m_OneUse(m_c_And(m_Shl(m_One(), m_Value(ShAmt)), m_Value(X))))) {
  953. if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
  954. X = Builder.CreateNot(X);
  955. Value *Lshr = Builder.CreateLShr(X, ShAmt);
  956. Value *And1 = Builder.CreateAnd(Lshr, ConstantInt::get(X->getType(), 1));
  957. return replaceInstUsesWith(Zext, And1);
  958. }
  959. // icmp ne A, B is equal to xor A, B when A and B only really have one bit.
  960. // It is also profitable to transform icmp eq into not(xor(A, B)) because
  961. // that may lead to additional simplifications.
  962. if (IntegerType *ITy = dyn_cast<IntegerType>(Zext.getType())) {
  963. Value *LHS = Cmp->getOperand(0);
  964. Value *RHS = Cmp->getOperand(1);
  965. KnownBits KnownLHS = computeKnownBits(LHS, 0, &Zext);
  966. KnownBits KnownRHS = computeKnownBits(RHS, 0, &Zext);
  967. if (KnownLHS.Zero == KnownRHS.Zero && KnownLHS.One == KnownRHS.One) {
  968. APInt KnownBits = KnownLHS.Zero | KnownLHS.One;
  969. APInt UnknownBit = ~KnownBits;
  970. if (UnknownBit.countPopulation() == 1) {
  971. Value *Result = Builder.CreateXor(LHS, RHS);
  972. // Mask off any bits that are set and won't be shifted away.
  973. if (KnownLHS.One.uge(UnknownBit))
  974. Result = Builder.CreateAnd(Result,
  975. ConstantInt::get(ITy, UnknownBit));
  976. // Shift the bit we're testing down to the lsb.
  977. Result = Builder.CreateLShr(
  978. Result, ConstantInt::get(ITy, UnknownBit.countTrailingZeros()));
  979. if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
  980. Result = Builder.CreateXor(Result, ConstantInt::get(ITy, 1));
  981. Result->takeName(Cmp);
  982. return replaceInstUsesWith(Zext, Result);
  983. }
  984. }
  985. }
  986. }
  987. return nullptr;
  988. }
  989. /// Determine if the specified value can be computed in the specified wider type
  990. /// and produce the same low bits. If not, return false.
  991. ///
  992. /// If this function returns true, it can also return a non-zero number of bits
  993. /// (in BitsToClear) which indicates that the value it computes is correct for
  994. /// the zero extend, but that the additional BitsToClear bits need to be zero'd
  995. /// out. For example, to promote something like:
  996. ///
  997. /// %B = trunc i64 %A to i32
  998. /// %C = lshr i32 %B, 8
  999. /// %E = zext i32 %C to i64
  1000. ///
  1001. /// CanEvaluateZExtd for the 'lshr' will return true, and BitsToClear will be
  1002. /// set to 8 to indicate that the promoted value needs to have bits 24-31
  1003. /// cleared in addition to bits 32-63. Since an 'and' will be generated to
  1004. /// clear the top bits anyway, doing this has no extra cost.
  1005. ///
  1006. /// This function works on both vectors and scalars.
  1007. static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,
  1008. InstCombinerImpl &IC, Instruction *CxtI) {
  1009. BitsToClear = 0;
  1010. if (canAlwaysEvaluateInType(V, Ty))
  1011. return true;
  1012. if (canNotEvaluateInType(V, Ty))
  1013. return false;
  1014. auto *I = cast<Instruction>(V);
  1015. unsigned Tmp;
  1016. switch (I->getOpcode()) {
  1017. case Instruction::ZExt: // zext(zext(x)) -> zext(x).
  1018. case Instruction::SExt: // zext(sext(x)) -> sext(x).
  1019. case Instruction::Trunc: // zext(trunc(x)) -> trunc(x) or zext(x)
  1020. return true;
  1021. case Instruction::And:
  1022. case Instruction::Or:
  1023. case Instruction::Xor:
  1024. case Instruction::Add:
  1025. case Instruction::Sub:
  1026. case Instruction::Mul:
  1027. if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI) ||
  1028. !canEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI))
  1029. return false;
  1030. // These can all be promoted if neither operand has 'bits to clear'.
  1031. if (BitsToClear == 0 && Tmp == 0)
  1032. return true;
  1033. // If the operation is an AND/OR/XOR and the bits to clear are zero in the
  1034. // other side, BitsToClear is ok.
  1035. if (Tmp == 0 && I->isBitwiseLogicOp()) {
  1036. // We use MaskedValueIsZero here for generality, but the case we care
  1037. // about the most is constant RHS.
  1038. unsigned VSize = V->getType()->getScalarSizeInBits();
  1039. if (IC.MaskedValueIsZero(I->getOperand(1),
  1040. APInt::getHighBitsSet(VSize, BitsToClear),
  1041. 0, CxtI)) {
  1042. // If this is an And instruction and all of the BitsToClear are
  1043. // known to be zero we can reset BitsToClear.
  1044. if (I->getOpcode() == Instruction::And)
  1045. BitsToClear = 0;
  1046. return true;
  1047. }
  1048. }
  1049. // Otherwise, we don't know how to analyze this BitsToClear case yet.
  1050. return false;
  1051. case Instruction::Shl: {
  1052. // We can promote shl(x, cst) if we can promote x. Since shl overwrites the
  1053. // upper bits we can reduce BitsToClear by the shift amount.
  1054. const APInt *Amt;
  1055. if (match(I->getOperand(1), m_APInt(Amt))) {
  1056. if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
  1057. return false;
  1058. uint64_t ShiftAmt = Amt->getZExtValue();
  1059. BitsToClear = ShiftAmt < BitsToClear ? BitsToClear - ShiftAmt : 0;
  1060. return true;
  1061. }
  1062. return false;
  1063. }
  1064. case Instruction::LShr: {
  1065. // We can promote lshr(x, cst) if we can promote x. This requires the
  1066. // ultimate 'and' to clear out the high zero bits we're clearing out though.
  1067. const APInt *Amt;
  1068. if (match(I->getOperand(1), m_APInt(Amt))) {
  1069. if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
  1070. return false;
  1071. BitsToClear += Amt->getZExtValue();
  1072. if (BitsToClear > V->getType()->getScalarSizeInBits())
  1073. BitsToClear = V->getType()->getScalarSizeInBits();
  1074. return true;
  1075. }
  1076. // Cannot promote variable LSHR.
  1077. return false;
  1078. }
  1079. case Instruction::Select:
  1080. if (!canEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI) ||
  1081. !canEvaluateZExtd(I->getOperand(2), Ty, BitsToClear, IC, CxtI) ||
  1082. // TODO: If important, we could handle the case when the BitsToClear are
  1083. // known zero in the disagreeing side.
  1084. Tmp != BitsToClear)
  1085. return false;
  1086. return true;
  1087. case Instruction::PHI: {
  1088. // We can change a phi if we can change all operands. Note that we never
  1089. // get into trouble with cyclic PHIs here because we only consider
  1090. // instructions with a single use.
  1091. PHINode *PN = cast<PHINode>(I);
  1092. if (!canEvaluateZExtd(PN->getIncomingValue(0), Ty, BitsToClear, IC, CxtI))
  1093. return false;
  1094. for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i)
  1095. if (!canEvaluateZExtd(PN->getIncomingValue(i), Ty, Tmp, IC, CxtI) ||
  1096. // TODO: If important, we could handle the case when the BitsToClear
  1097. // are known zero in the disagreeing input.
  1098. Tmp != BitsToClear)
  1099. return false;
  1100. return true;
  1101. }
  1102. default:
  1103. // TODO: Can handle more cases here.
  1104. return false;
  1105. }
  1106. }
  1107. Instruction *InstCombinerImpl::visitZExt(ZExtInst &CI) {
  1108. // If this zero extend is only used by a truncate, let the truncate be
  1109. // eliminated before we try to optimize this zext.
  1110. if (CI.hasOneUse() && isa<TruncInst>(CI.user_back()))
  1111. return nullptr;
  1112. // If one of the common conversion will work, do it.
  1113. if (Instruction *Result = commonCastTransforms(CI))
  1114. return Result;
  1115. Value *Src = CI.getOperand(0);
  1116. Type *SrcTy = Src->getType(), *DestTy = CI.getType();
  1117. // Try to extend the entire expression tree to the wide destination type.
  1118. unsigned BitsToClear;
  1119. if (shouldChangeType(SrcTy, DestTy) &&
  1120. canEvaluateZExtd(Src, DestTy, BitsToClear, *this, &CI)) {
  1121. assert(BitsToClear <= SrcTy->getScalarSizeInBits() &&
  1122. "Can't clear more bits than in SrcTy");
  1123. // Okay, we can transform this! Insert the new expression now.
  1124. LLVM_DEBUG(
  1125. dbgs() << "ICE: EvaluateInDifferentType converting expression type"
  1126. " to avoid zero extend: "
  1127. << CI << '\n');
  1128. Value *Res = EvaluateInDifferentType(Src, DestTy, false);
  1129. assert(Res->getType() == DestTy);
  1130. // Preserve debug values referring to Src if the zext is its last use.
  1131. if (auto *SrcOp = dyn_cast<Instruction>(Src))
  1132. if (SrcOp->hasOneUse())
  1133. replaceAllDbgUsesWith(*SrcOp, *Res, CI, DT);
  1134. uint32_t SrcBitsKept = SrcTy->getScalarSizeInBits()-BitsToClear;
  1135. uint32_t DestBitSize = DestTy->getScalarSizeInBits();
  1136. // If the high bits are already filled with zeros, just replace this
  1137. // cast with the result.
  1138. if (MaskedValueIsZero(Res,
  1139. APInt::getHighBitsSet(DestBitSize,
  1140. DestBitSize-SrcBitsKept),
  1141. 0, &CI))
  1142. return replaceInstUsesWith(CI, Res);
  1143. // We need to emit an AND to clear the high bits.
  1144. Constant *C = ConstantInt::get(Res->getType(),
  1145. APInt::getLowBitsSet(DestBitSize, SrcBitsKept));
  1146. return BinaryOperator::CreateAnd(Res, C);
  1147. }
  1148. // If this is a TRUNC followed by a ZEXT then we are dealing with integral
  1149. // types and if the sizes are just right we can convert this into a logical
  1150. // 'and' which will be much cheaper than the pair of casts.
  1151. if (TruncInst *CSrc = dyn_cast<TruncInst>(Src)) { // A->B->C cast
  1152. // TODO: Subsume this into EvaluateInDifferentType.
  1153. // Get the sizes of the types involved. We know that the intermediate type
  1154. // will be smaller than A or C, but don't know the relation between A and C.
  1155. Value *A = CSrc->getOperand(0);
  1156. unsigned SrcSize = A->getType()->getScalarSizeInBits();
  1157. unsigned MidSize = CSrc->getType()->getScalarSizeInBits();
  1158. unsigned DstSize = CI.getType()->getScalarSizeInBits();
  1159. // If we're actually extending zero bits, then if
  1160. // SrcSize < DstSize: zext(a & mask)
  1161. // SrcSize == DstSize: a & mask
  1162. // SrcSize > DstSize: trunc(a) & mask
  1163. if (SrcSize < DstSize) {
  1164. APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
  1165. Constant *AndConst = ConstantInt::get(A->getType(), AndValue);
  1166. Value *And = Builder.CreateAnd(A, AndConst, CSrc->getName() + ".mask");
  1167. return new ZExtInst(And, CI.getType());
  1168. }
  1169. if (SrcSize == DstSize) {
  1170. APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
  1171. return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(),
  1172. AndValue));
  1173. }
  1174. if (SrcSize > DstSize) {
  1175. Value *Trunc = Builder.CreateTrunc(A, CI.getType());
  1176. APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));
  1177. return BinaryOperator::CreateAnd(Trunc,
  1178. ConstantInt::get(Trunc->getType(),
  1179. AndValue));
  1180. }
  1181. }
  1182. if (ICmpInst *Cmp = dyn_cast<ICmpInst>(Src))
  1183. return transformZExtICmp(Cmp, CI);
  1184. // zext(trunc(X) & C) -> (X & zext(C)).
  1185. Constant *C;
  1186. Value *X;
  1187. if (match(Src, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Constant(C)))) &&
  1188. X->getType() == CI.getType())
  1189. return BinaryOperator::CreateAnd(X, ConstantExpr::getZExt(C, CI.getType()));
  1190. // zext((trunc(X) & C) ^ C) -> ((X & zext(C)) ^ zext(C)).
  1191. Value *And;
  1192. if (match(Src, m_OneUse(m_Xor(m_Value(And), m_Constant(C)))) &&
  1193. match(And, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Specific(C)))) &&
  1194. X->getType() == CI.getType()) {
  1195. Constant *ZC = ConstantExpr::getZExt(C, CI.getType());
  1196. return BinaryOperator::CreateXor(Builder.CreateAnd(X, ZC), ZC);
  1197. }
  1198. if (match(Src, m_VScale(DL))) {
  1199. if (CI.getFunction() &&
  1200. CI.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
  1201. Attribute Attr = CI.getFunction()->getFnAttribute(Attribute::VScaleRange);
  1202. if (Optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {
  1203. unsigned TypeWidth = Src->getType()->getScalarSizeInBits();
  1204. if (Log2_32(MaxVScale.getValue()) < TypeWidth) {
  1205. Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1));
  1206. return replaceInstUsesWith(CI, VScale);
  1207. }
  1208. }
  1209. }
  1210. }
  1211. return nullptr;
  1212. }
  1213. /// Transform (sext icmp) to bitwise / integer operations to eliminate the icmp.
  1214. Instruction *InstCombinerImpl::transformSExtICmp(ICmpInst *ICI,
  1215. Instruction &CI) {
  1216. Value *Op0 = ICI->getOperand(0), *Op1 = ICI->getOperand(1);
  1217. ICmpInst::Predicate Pred = ICI->getPredicate();
  1218. // Don't bother if Op1 isn't of vector or integer type.
  1219. if (!Op1->getType()->isIntOrIntVectorTy())
  1220. return nullptr;
  1221. if ((Pred == ICmpInst::ICMP_SLT && match(Op1, m_ZeroInt())) ||
  1222. (Pred == ICmpInst::ICMP_SGT && match(Op1, m_AllOnes()))) {
  1223. // (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if negative
  1224. // (x >s -1) ? -1 : 0 -> not (ashr x, 31) -> all ones if positive
  1225. Value *Sh = ConstantInt::get(Op0->getType(),
  1226. Op0->getType()->getScalarSizeInBits() - 1);
  1227. Value *In = Builder.CreateAShr(Op0, Sh, Op0->getName() + ".lobit");
  1228. if (In->getType() != CI.getType())
  1229. In = Builder.CreateIntCast(In, CI.getType(), true /*SExt*/);
  1230. if (Pred == ICmpInst::ICMP_SGT)
  1231. In = Builder.CreateNot(In, In->getName() + ".not");
  1232. return replaceInstUsesWith(CI, In);
  1233. }
  1234. if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
  1235. // If we know that only one bit of the LHS of the icmp can be set and we
  1236. // have an equality comparison with zero or a power of 2, we can transform
  1237. // the icmp and sext into bitwise/integer operations.
  1238. if (ICI->hasOneUse() &&
  1239. ICI->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){
  1240. KnownBits Known = computeKnownBits(Op0, 0, &CI);
  1241. APInt KnownZeroMask(~Known.Zero);
  1242. if (KnownZeroMask.isPowerOf2()) {
  1243. Value *In = ICI->getOperand(0);
  1244. // If the icmp tests for a known zero bit we can constant fold it.
  1245. if (!Op1C->isZero() && Op1C->getValue() != KnownZeroMask) {
  1246. Value *V = Pred == ICmpInst::ICMP_NE ?
  1247. ConstantInt::getAllOnesValue(CI.getType()) :
  1248. ConstantInt::getNullValue(CI.getType());
  1249. return replaceInstUsesWith(CI, V);
  1250. }
  1251. if (!Op1C->isZero() == (Pred == ICmpInst::ICMP_NE)) {
  1252. // sext ((x & 2^n) == 0) -> (x >> n) - 1
  1253. // sext ((x & 2^n) != 2^n) -> (x >> n) - 1
  1254. unsigned ShiftAmt = KnownZeroMask.countTrailingZeros();
  1255. // Perform a right shift to place the desired bit in the LSB.
  1256. if (ShiftAmt)
  1257. In = Builder.CreateLShr(In,
  1258. ConstantInt::get(In->getType(), ShiftAmt));
  1259. // At this point "In" is either 1 or 0. Subtract 1 to turn
  1260. // {1, 0} -> {0, -1}.
  1261. In = Builder.CreateAdd(In,
  1262. ConstantInt::getAllOnesValue(In->getType()),
  1263. "sext");
  1264. } else {
  1265. // sext ((x & 2^n) != 0) -> (x << bitwidth-n) a>> bitwidth-1
  1266. // sext ((x & 2^n) == 2^n) -> (x << bitwidth-n) a>> bitwidth-1
  1267. unsigned ShiftAmt = KnownZeroMask.countLeadingZeros();
  1268. // Perform a left shift to place the desired bit in the MSB.
  1269. if (ShiftAmt)
  1270. In = Builder.CreateShl(In,
  1271. ConstantInt::get(In->getType(), ShiftAmt));
  1272. // Distribute the bit over the whole bit width.
  1273. In = Builder.CreateAShr(In, ConstantInt::get(In->getType(),
  1274. KnownZeroMask.getBitWidth() - 1), "sext");
  1275. }
  1276. if (CI.getType() == In->getType())
  1277. return replaceInstUsesWith(CI, In);
  1278. return CastInst::CreateIntegerCast(In, CI.getType(), true/*SExt*/);
  1279. }
  1280. }
  1281. }
  1282. return nullptr;
  1283. }
  1284. /// Return true if we can take the specified value and return it as type Ty
  1285. /// without inserting any new casts and without changing the value of the common
  1286. /// low bits. This is used by code that tries to promote integer operations to
  1287. /// a wider types will allow us to eliminate the extension.
  1288. ///
  1289. /// This function works on both vectors and scalars.
  1290. ///
  1291. static bool canEvaluateSExtd(Value *V, Type *Ty) {
  1292. assert(V->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits() &&
  1293. "Can't sign extend type to a smaller type");
  1294. if (canAlwaysEvaluateInType(V, Ty))
  1295. return true;
  1296. if (canNotEvaluateInType(V, Ty))
  1297. return false;
  1298. auto *I = cast<Instruction>(V);
  1299. switch (I->getOpcode()) {
  1300. case Instruction::SExt: // sext(sext(x)) -> sext(x)
  1301. case Instruction::ZExt: // sext(zext(x)) -> zext(x)
  1302. case Instruction::Trunc: // sext(trunc(x)) -> trunc(x) or sext(x)
  1303. return true;
  1304. case Instruction::And:
  1305. case Instruction::Or:
  1306. case Instruction::Xor:
  1307. case Instruction::Add:
  1308. case Instruction::Sub:
  1309. case Instruction::Mul:
  1310. // These operators can all arbitrarily be extended if their inputs can.
  1311. return canEvaluateSExtd(I->getOperand(0), Ty) &&
  1312. canEvaluateSExtd(I->getOperand(1), Ty);
  1313. //case Instruction::Shl: TODO
  1314. //case Instruction::LShr: TODO
  1315. case Instruction::Select:
  1316. return canEvaluateSExtd(I->getOperand(1), Ty) &&
  1317. canEvaluateSExtd(I->getOperand(2), Ty);
  1318. case Instruction::PHI: {
  1319. // We can change a phi if we can change all operands. Note that we never
  1320. // get into trouble with cyclic PHIs here because we only consider
  1321. // instructions with a single use.
  1322. PHINode *PN = cast<PHINode>(I);
  1323. for (Value *IncValue : PN->incoming_values())
  1324. if (!canEvaluateSExtd(IncValue, Ty)) return false;
  1325. return true;
  1326. }
  1327. default:
  1328. // TODO: Can handle more cases here.
  1329. break;
  1330. }
  1331. return false;
  1332. }
  1333. Instruction *InstCombinerImpl::visitSExt(SExtInst &CI) {
  1334. // If this sign extend is only used by a truncate, let the truncate be
  1335. // eliminated before we try to optimize this sext.
  1336. if (CI.hasOneUse() && isa<TruncInst>(CI.user_back()))
  1337. return nullptr;
  1338. if (Instruction *I = commonCastTransforms(CI))
  1339. return I;
  1340. Value *Src = CI.getOperand(0);
  1341. Type *SrcTy = Src->getType(), *DestTy = CI.getType();
  1342. unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
  1343. unsigned DestBitSize = DestTy->getScalarSizeInBits();
  1344. // If we know that the value being extended is positive, we can use a zext
  1345. // instead.
  1346. KnownBits Known = computeKnownBits(Src, 0, &CI);
  1347. if (Known.isNonNegative())
  1348. return CastInst::Create(Instruction::ZExt, Src, DestTy);
  1349. // Try to extend the entire expression tree to the wide destination type.
  1350. if (shouldChangeType(SrcTy, DestTy) && canEvaluateSExtd(Src, DestTy)) {
  1351. // Okay, we can transform this! Insert the new expression now.
  1352. LLVM_DEBUG(
  1353. dbgs() << "ICE: EvaluateInDifferentType converting expression type"
  1354. " to avoid sign extend: "
  1355. << CI << '\n');
  1356. Value *Res = EvaluateInDifferentType(Src, DestTy, true);
  1357. assert(Res->getType() == DestTy);
  1358. // If the high bits are already filled with sign bit, just replace this
  1359. // cast with the result.
  1360. if (ComputeNumSignBits(Res, 0, &CI) > DestBitSize - SrcBitSize)
  1361. return replaceInstUsesWith(CI, Res);
  1362. // We need to emit a shl + ashr to do the sign extend.
  1363. Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize);
  1364. return BinaryOperator::CreateAShr(Builder.CreateShl(Res, ShAmt, "sext"),
  1365. ShAmt);
  1366. }
  1367. Value *X;
  1368. if (match(Src, m_Trunc(m_Value(X)))) {
  1369. // If the input has more sign bits than bits truncated, then convert
  1370. // directly to final type.
  1371. unsigned XBitSize = X->getType()->getScalarSizeInBits();
  1372. if (ComputeNumSignBits(X, 0, &CI) > XBitSize - SrcBitSize)
  1373. return CastInst::CreateIntegerCast(X, DestTy, /* isSigned */ true);
  1374. // If input is a trunc from the destination type, then convert into shifts.
  1375. if (Src->hasOneUse() && X->getType() == DestTy) {
  1376. // sext (trunc X) --> ashr (shl X, C), C
  1377. Constant *ShAmt = ConstantInt::get(DestTy, DestBitSize - SrcBitSize);
  1378. return BinaryOperator::CreateAShr(Builder.CreateShl(X, ShAmt), ShAmt);
  1379. }
  1380. // If we are replacing shifted-in high zero bits with sign bits, convert
  1381. // the logic shift to arithmetic shift and eliminate the cast to
  1382. // intermediate type:
  1383. // sext (trunc (lshr Y, C)) --> sext/trunc (ashr Y, C)
  1384. Value *Y;
  1385. if (Src->hasOneUse() &&
  1386. match(X, m_LShr(m_Value(Y),
  1387. m_SpecificIntAllowUndef(XBitSize - SrcBitSize)))) {
  1388. Value *Ashr = Builder.CreateAShr(Y, XBitSize - SrcBitSize);
  1389. return CastInst::CreateIntegerCast(Ashr, DestTy, /* isSigned */ true);
  1390. }
  1391. }
  1392. if (ICmpInst *ICI = dyn_cast<ICmpInst>(Src))
  1393. return transformSExtICmp(ICI, CI);
  1394. // If the input is a shl/ashr pair of a same constant, then this is a sign
  1395. // extension from a smaller value. If we could trust arbitrary bitwidth
  1396. // integers, we could turn this into a truncate to the smaller bit and then
  1397. // use a sext for the whole extension. Since we don't, look deeper and check
  1398. // for a truncate. If the source and dest are the same type, eliminate the
  1399. // trunc and extend and just do shifts. For example, turn:
  1400. // %a = trunc i32 %i to i8
  1401. // %b = shl i8 %a, C
  1402. // %c = ashr i8 %b, C
  1403. // %d = sext i8 %c to i32
  1404. // into:
  1405. // %a = shl i32 %i, 32-(8-C)
  1406. // %d = ashr i32 %a, 32-(8-C)
  1407. Value *A = nullptr;
  1408. // TODO: Eventually this could be subsumed by EvaluateInDifferentType.
  1409. Constant *BA = nullptr, *CA = nullptr;
  1410. if (match(Src, m_AShr(m_Shl(m_Trunc(m_Value(A)), m_Constant(BA)),
  1411. m_Constant(CA))) &&
  1412. BA->isElementWiseEqual(CA) && A->getType() == DestTy) {
  1413. Constant *WideCurrShAmt = ConstantExpr::getSExt(CA, DestTy);
  1414. Constant *NumLowbitsLeft = ConstantExpr::getSub(
  1415. ConstantInt::get(DestTy, SrcTy->getScalarSizeInBits()), WideCurrShAmt);
  1416. Constant *NewShAmt = ConstantExpr::getSub(
  1417. ConstantInt::get(DestTy, DestTy->getScalarSizeInBits()),
  1418. NumLowbitsLeft);
  1419. NewShAmt =
  1420. Constant::mergeUndefsWith(Constant::mergeUndefsWith(NewShAmt, BA), CA);
  1421. A = Builder.CreateShl(A, NewShAmt, CI.getName());
  1422. return BinaryOperator::CreateAShr(A, NewShAmt);
  1423. }
  1424. // Splatting a bit of constant-index across a value:
  1425. // sext (ashr (trunc iN X to iM), M-1) to iN --> ashr (shl X, N-M), N-1
  1426. // TODO: If the dest type is different, use a cast (adjust use check).
  1427. if (match(Src, m_OneUse(m_AShr(m_Trunc(m_Value(X)),
  1428. m_SpecificInt(SrcBitSize - 1)))) &&
  1429. X->getType() == DestTy) {
  1430. Constant *ShlAmtC = ConstantInt::get(DestTy, DestBitSize - SrcBitSize);
  1431. Constant *AshrAmtC = ConstantInt::get(DestTy, DestBitSize - 1);
  1432. Value *Shl = Builder.CreateShl(X, ShlAmtC);
  1433. return BinaryOperator::CreateAShr(Shl, AshrAmtC);
  1434. }
  1435. if (match(Src, m_VScale(DL))) {
  1436. if (CI.getFunction() &&
  1437. CI.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
  1438. Attribute Attr = CI.getFunction()->getFnAttribute(Attribute::VScaleRange);
  1439. if (Optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {
  1440. if (Log2_32(MaxVScale.getValue()) < (SrcBitSize - 1)) {
  1441. Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1));
  1442. return replaceInstUsesWith(CI, VScale);
  1443. }
  1444. }
  1445. }
  1446. }
  1447. return nullptr;
  1448. }
  1449. /// Return a Constant* for the specified floating-point constant if it fits
  1450. /// in the specified FP type without changing its value.
  1451. static bool fitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) {
  1452. bool losesInfo;
  1453. APFloat F = CFP->getValueAPF();
  1454. (void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo);
  1455. return !losesInfo;
  1456. }
  1457. static Type *shrinkFPConstant(ConstantFP *CFP) {
  1458. if (CFP->getType() == Type::getPPC_FP128Ty(CFP->getContext()))
  1459. return nullptr; // No constant folding of this.
  1460. // See if the value can be truncated to half and then reextended.
  1461. if (fitsInFPType(CFP, APFloat::IEEEhalf()))
  1462. return Type::getHalfTy(CFP->getContext());
  1463. // See if the value can be truncated to float and then reextended.
  1464. if (fitsInFPType(CFP, APFloat::IEEEsingle()))
  1465. return Type::getFloatTy(CFP->getContext());
  1466. if (CFP->getType()->isDoubleTy())
  1467. return nullptr; // Won't shrink.
  1468. if (fitsInFPType(CFP, APFloat::IEEEdouble()))
  1469. return Type::getDoubleTy(CFP->getContext());
  1470. // Don't try to shrink to various long double types.
  1471. return nullptr;
  1472. }
  1473. // Determine if this is a vector of ConstantFPs and if so, return the minimal
  1474. // type we can safely truncate all elements to.
  1475. // TODO: Make these support undef elements.
  1476. static Type *shrinkFPConstantVector(Value *V) {
  1477. auto *CV = dyn_cast<Constant>(V);
  1478. auto *CVVTy = dyn_cast<FixedVectorType>(V->getType());
  1479. if (!CV || !CVVTy)
  1480. return nullptr;
  1481. Type *MinType = nullptr;
  1482. unsigned NumElts = CVVTy->getNumElements();
  1483. // For fixed-width vectors we find the minimal type by looking
  1484. // through the constant values of the vector.
  1485. for (unsigned i = 0; i != NumElts; ++i) {
  1486. auto *CFP = dyn_cast_or_null<ConstantFP>(CV->getAggregateElement(i));
  1487. if (!CFP)
  1488. return nullptr;
  1489. Type *T = shrinkFPConstant(CFP);
  1490. if (!T)
  1491. return nullptr;
  1492. // If we haven't found a type yet or this type has a larger mantissa than
  1493. // our previous type, this is our new minimal type.
  1494. if (!MinType || T->getFPMantissaWidth() > MinType->getFPMantissaWidth())
  1495. MinType = T;
  1496. }
  1497. // Make a vector type from the minimal type.
  1498. return FixedVectorType::get(MinType, NumElts);
  1499. }
  1500. /// Find the minimum FP type we can safely truncate to.
  1501. static Type *getMinimumFPType(Value *V) {
  1502. if (auto *FPExt = dyn_cast<FPExtInst>(V))
  1503. return FPExt->getOperand(0)->getType();
  1504. // If this value is a constant, return the constant in the smallest FP type
  1505. // that can accurately represent it. This allows us to turn
  1506. // (float)((double)X+2.0) into x+2.0f.
  1507. if (auto *CFP = dyn_cast<ConstantFP>(V))
  1508. if (Type *T = shrinkFPConstant(CFP))
  1509. return T;
  1510. // We can only correctly find a minimum type for a scalable vector when it is
  1511. // a splat. For splats of constant values the fpext is wrapped up as a
  1512. // ConstantExpr.
  1513. if (auto *FPCExt = dyn_cast<ConstantExpr>(V))
  1514. if (FPCExt->getOpcode() == Instruction::FPExt)
  1515. return FPCExt->getOperand(0)->getType();
  1516. // Try to shrink a vector of FP constants. This returns nullptr on scalable
  1517. // vectors
  1518. if (Type *T = shrinkFPConstantVector(V))
  1519. return T;
  1520. return V->getType();
  1521. }
  1522. /// Return true if the cast from integer to FP can be proven to be exact for all
  1523. /// possible inputs (the conversion does not lose any precision).
  1524. static bool isKnownExactCastIntToFP(CastInst &I) {
  1525. CastInst::CastOps Opcode = I.getOpcode();
  1526. assert((Opcode == CastInst::SIToFP || Opcode == CastInst::UIToFP) &&
  1527. "Unexpected cast");
  1528. Value *Src = I.getOperand(0);
  1529. Type *SrcTy = Src->getType();
  1530. Type *FPTy = I.getType();
  1531. bool IsSigned = Opcode == Instruction::SIToFP;
  1532. int SrcSize = (int)SrcTy->getScalarSizeInBits() - IsSigned;
  1533. // Easy case - if the source integer type has less bits than the FP mantissa,
  1534. // then the cast must be exact.
  1535. int DestNumSigBits = FPTy->getFPMantissaWidth();
  1536. if (SrcSize <= DestNumSigBits)
  1537. return true;
  1538. // Cast from FP to integer and back to FP is independent of the intermediate
  1539. // integer width because of poison on overflow.
  1540. Value *F;
  1541. if (match(Src, m_FPToSI(m_Value(F))) || match(Src, m_FPToUI(m_Value(F)))) {
  1542. // If this is uitofp (fptosi F), the source needs an extra bit to avoid
  1543. // potential rounding of negative FP input values.
  1544. int SrcNumSigBits = F->getType()->getFPMantissaWidth();
  1545. if (!IsSigned && match(Src, m_FPToSI(m_Value())))
  1546. SrcNumSigBits++;
  1547. // [su]itofp (fpto[su]i F) --> exact if the source type has less or equal
  1548. // significant bits than the destination (and make sure neither type is
  1549. // weird -- ppc_fp128).
  1550. if (SrcNumSigBits > 0 && DestNumSigBits > 0 &&
  1551. SrcNumSigBits <= DestNumSigBits)
  1552. return true;
  1553. }
  1554. // TODO:
  1555. // Try harder to find if the source integer type has less significant bits.
  1556. // For example, compute number of sign bits or compute low bit mask.
  1557. return false;
  1558. }
  1559. Instruction *InstCombinerImpl::visitFPTrunc(FPTruncInst &FPT) {
  1560. if (Instruction *I = commonCastTransforms(FPT))
  1561. return I;
  1562. // If we have fptrunc(OpI (fpextend x), (fpextend y)), we would like to
  1563. // simplify this expression to avoid one or more of the trunc/extend
  1564. // operations if we can do so without changing the numerical results.
  1565. //
  1566. // The exact manner in which the widths of the operands interact to limit
  1567. // what we can and cannot do safely varies from operation to operation, and
  1568. // is explained below in the various case statements.
  1569. Type *Ty = FPT.getType();
  1570. auto *BO = dyn_cast<BinaryOperator>(FPT.getOperand(0));
  1571. if (BO && BO->hasOneUse()) {
  1572. Type *LHSMinType = getMinimumFPType(BO->getOperand(0));
  1573. Type *RHSMinType = getMinimumFPType(BO->getOperand(1));
  1574. unsigned OpWidth = BO->getType()->getFPMantissaWidth();
  1575. unsigned LHSWidth = LHSMinType->getFPMantissaWidth();
  1576. unsigned RHSWidth = RHSMinType->getFPMantissaWidth();
  1577. unsigned SrcWidth = std::max(LHSWidth, RHSWidth);
  1578. unsigned DstWidth = Ty->getFPMantissaWidth();
  1579. switch (BO->getOpcode()) {
  1580. default: break;
  1581. case Instruction::FAdd:
  1582. case Instruction::FSub:
  1583. // For addition and subtraction, the infinitely precise result can
  1584. // essentially be arbitrarily wide; proving that double rounding
  1585. // will not occur because the result of OpI is exact (as we will for
  1586. // FMul, for example) is hopeless. However, we *can* nonetheless
  1587. // frequently know that double rounding cannot occur (or that it is
  1588. // innocuous) by taking advantage of the specific structure of
  1589. // infinitely-precise results that admit double rounding.
  1590. //
  1591. // Specifically, if OpWidth >= 2*DstWdith+1 and DstWidth is sufficient
  1592. // to represent both sources, we can guarantee that the double
  1593. // rounding is innocuous (See p50 of Figueroa's 2000 PhD thesis,
  1594. // "A Rigorous Framework for Fully Supporting the IEEE Standard ..."
  1595. // for proof of this fact).
  1596. //
  1597. // Note: Figueroa does not consider the case where DstFormat !=
  1598. // SrcFormat. It's possible (likely even!) that this analysis
  1599. // could be tightened for those cases, but they are rare (the main
  1600. // case of interest here is (float)((double)float + float)).
  1601. if (OpWidth >= 2*DstWidth+1 && DstWidth >= SrcWidth) {
  1602. Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
  1603. Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
  1604. Instruction *RI = BinaryOperator::Create(BO->getOpcode(), LHS, RHS);
  1605. RI->copyFastMathFlags(BO);
  1606. return RI;
  1607. }
  1608. break;
  1609. case Instruction::FMul:
  1610. // For multiplication, the infinitely precise result has at most
  1611. // LHSWidth + RHSWidth significant bits; if OpWidth is sufficient
  1612. // that such a value can be exactly represented, then no double
  1613. // rounding can possibly occur; we can safely perform the operation
  1614. // in the destination format if it can represent both sources.
  1615. if (OpWidth >= LHSWidth + RHSWidth && DstWidth >= SrcWidth) {
  1616. Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
  1617. Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
  1618. return BinaryOperator::CreateFMulFMF(LHS, RHS, BO);
  1619. }
  1620. break;
  1621. case Instruction::FDiv:
  1622. // For division, we use again use the bound from Figueroa's
  1623. // dissertation. I am entirely certain that this bound can be
  1624. // tightened in the unbalanced operand case by an analysis based on
  1625. // the diophantine rational approximation bound, but the well-known
  1626. // condition used here is a good conservative first pass.
  1627. // TODO: Tighten bound via rigorous analysis of the unbalanced case.
  1628. if (OpWidth >= 2*DstWidth && DstWidth >= SrcWidth) {
  1629. Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
  1630. Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
  1631. return BinaryOperator::CreateFDivFMF(LHS, RHS, BO);
  1632. }
  1633. break;
  1634. case Instruction::FRem: {
  1635. // Remainder is straightforward. Remainder is always exact, so the
  1636. // type of OpI doesn't enter into things at all. We simply evaluate
  1637. // in whichever source type is larger, then convert to the
  1638. // destination type.
  1639. if (SrcWidth == OpWidth)
  1640. break;
  1641. Value *LHS, *RHS;
  1642. if (LHSWidth == SrcWidth) {
  1643. LHS = Builder.CreateFPTrunc(BO->getOperand(0), LHSMinType);
  1644. RHS = Builder.CreateFPTrunc(BO->getOperand(1), LHSMinType);
  1645. } else {
  1646. LHS = Builder.CreateFPTrunc(BO->getOperand(0), RHSMinType);
  1647. RHS = Builder.CreateFPTrunc(BO->getOperand(1), RHSMinType);
  1648. }
  1649. Value *ExactResult = Builder.CreateFRemFMF(LHS, RHS, BO);
  1650. return CastInst::CreateFPCast(ExactResult, Ty);
  1651. }
  1652. }
  1653. }
  1654. // (fptrunc (fneg x)) -> (fneg (fptrunc x))
  1655. Value *X;
  1656. Instruction *Op = dyn_cast<Instruction>(FPT.getOperand(0));
  1657. if (Op && Op->hasOneUse()) {
  1658. // FIXME: The FMF should propagate from the fptrunc, not the source op.
  1659. IRBuilder<>::FastMathFlagGuard FMFG(Builder);
  1660. if (isa<FPMathOperator>(Op))
  1661. Builder.setFastMathFlags(Op->getFastMathFlags());
  1662. if (match(Op, m_FNeg(m_Value(X)))) {
  1663. Value *InnerTrunc = Builder.CreateFPTrunc(X, Ty);
  1664. return UnaryOperator::CreateFNegFMF(InnerTrunc, Op);
  1665. }
  1666. // If we are truncating a select that has an extended operand, we can
  1667. // narrow the other operand and do the select as a narrow op.
  1668. Value *Cond, *X, *Y;
  1669. if (match(Op, m_Select(m_Value(Cond), m_FPExt(m_Value(X)), m_Value(Y))) &&
  1670. X->getType() == Ty) {
  1671. // fptrunc (select Cond, (fpext X), Y --> select Cond, X, (fptrunc Y)
  1672. Value *NarrowY = Builder.CreateFPTrunc(Y, Ty);
  1673. Value *Sel = Builder.CreateSelect(Cond, X, NarrowY, "narrow.sel", Op);
  1674. return replaceInstUsesWith(FPT, Sel);
  1675. }
  1676. if (match(Op, m_Select(m_Value(Cond), m_Value(Y), m_FPExt(m_Value(X)))) &&
  1677. X->getType() == Ty) {
  1678. // fptrunc (select Cond, Y, (fpext X) --> select Cond, (fptrunc Y), X
  1679. Value *NarrowY = Builder.CreateFPTrunc(Y, Ty);
  1680. Value *Sel = Builder.CreateSelect(Cond, NarrowY, X, "narrow.sel", Op);
  1681. return replaceInstUsesWith(FPT, Sel);
  1682. }
  1683. }
  1684. if (auto *II = dyn_cast<IntrinsicInst>(FPT.getOperand(0))) {
  1685. switch (II->getIntrinsicID()) {
  1686. default: break;
  1687. case Intrinsic::ceil:
  1688. case Intrinsic::fabs:
  1689. case Intrinsic::floor:
  1690. case Intrinsic::nearbyint:
  1691. case Intrinsic::rint:
  1692. case Intrinsic::round:
  1693. case Intrinsic::roundeven:
  1694. case Intrinsic::trunc: {
  1695. Value *Src = II->getArgOperand(0);
  1696. if (!Src->hasOneUse())
  1697. break;
  1698. // Except for fabs, this transformation requires the input of the unary FP
  1699. // operation to be itself an fpext from the type to which we're
  1700. // truncating.
  1701. if (II->getIntrinsicID() != Intrinsic::fabs) {
  1702. FPExtInst *FPExtSrc = dyn_cast<FPExtInst>(Src);
  1703. if (!FPExtSrc || FPExtSrc->getSrcTy() != Ty)
  1704. break;
  1705. }
  1706. // Do unary FP operation on smaller type.
  1707. // (fptrunc (fabs x)) -> (fabs (fptrunc x))
  1708. Value *InnerTrunc = Builder.CreateFPTrunc(Src, Ty);
  1709. Function *Overload = Intrinsic::getDeclaration(FPT.getModule(),
  1710. II->getIntrinsicID(), Ty);
  1711. SmallVector<OperandBundleDef, 1> OpBundles;
  1712. II->getOperandBundlesAsDefs(OpBundles);
  1713. CallInst *NewCI =
  1714. CallInst::Create(Overload, {InnerTrunc}, OpBundles, II->getName());
  1715. NewCI->copyFastMathFlags(II);
  1716. return NewCI;
  1717. }
  1718. }
  1719. }
  1720. if (Instruction *I = shrinkInsertElt(FPT, Builder))
  1721. return I;
  1722. Value *Src = FPT.getOperand(0);
  1723. if (isa<SIToFPInst>(Src) || isa<UIToFPInst>(Src)) {
  1724. auto *FPCast = cast<CastInst>(Src);
  1725. if (isKnownExactCastIntToFP(*FPCast))
  1726. return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);
  1727. }
  1728. return nullptr;
  1729. }
  1730. Instruction *InstCombinerImpl::visitFPExt(CastInst &FPExt) {
  1731. // If the source operand is a cast from integer to FP and known exact, then
  1732. // cast the integer operand directly to the destination type.
  1733. Type *Ty = FPExt.getType();
  1734. Value *Src = FPExt.getOperand(0);
  1735. if (isa<SIToFPInst>(Src) || isa<UIToFPInst>(Src)) {
  1736. auto *FPCast = cast<CastInst>(Src);
  1737. if (isKnownExactCastIntToFP(*FPCast))
  1738. return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);
  1739. }
  1740. return commonCastTransforms(FPExt);
  1741. }
  1742. /// fpto{s/u}i({u/s}itofp(X)) --> X or zext(X) or sext(X) or trunc(X)
  1743. /// This is safe if the intermediate type has enough bits in its mantissa to
  1744. /// accurately represent all values of X. For example, this won't work with
  1745. /// i64 -> float -> i64.
  1746. Instruction *InstCombinerImpl::foldItoFPtoI(CastInst &FI) {
  1747. if (!isa<UIToFPInst>(FI.getOperand(0)) && !isa<SIToFPInst>(FI.getOperand(0)))
  1748. return nullptr;
  1749. auto *OpI = cast<CastInst>(FI.getOperand(0));
  1750. Value *X = OpI->getOperand(0);
  1751. Type *XType = X->getType();
  1752. Type *DestType = FI.getType();
  1753. bool IsOutputSigned = isa<FPToSIInst>(FI);
  1754. // Since we can assume the conversion won't overflow, our decision as to
  1755. // whether the input will fit in the float should depend on the minimum
  1756. // of the input range and output range.
  1757. // This means this is also safe for a signed input and unsigned output, since
  1758. // a negative input would lead to undefined behavior.
  1759. if (!isKnownExactCastIntToFP(*OpI)) {
  1760. // The first cast may not round exactly based on the source integer width
  1761. // and FP width, but the overflow UB rules can still allow this to fold.
  1762. // If the destination type is narrow, that means the intermediate FP value
  1763. // must be large enough to hold the source value exactly.
  1764. // For example, (uint8_t)((float)(uint32_t 16777217) is undefined behavior.
  1765. int OutputSize = (int)DestType->getScalarSizeInBits() - IsOutputSigned;
  1766. if (OutputSize > OpI->getType()->getFPMantissaWidth())
  1767. return nullptr;
  1768. }
  1769. if (DestType->getScalarSizeInBits() > XType->getScalarSizeInBits()) {
  1770. bool IsInputSigned = isa<SIToFPInst>(OpI);
  1771. if (IsInputSigned && IsOutputSigned)
  1772. return new SExtInst(X, DestType);
  1773. return new ZExtInst(X, DestType);
  1774. }
  1775. if (DestType->getScalarSizeInBits() < XType->getScalarSizeInBits())
  1776. return new TruncInst(X, DestType);
  1777. assert(XType == DestType && "Unexpected types for int to FP to int casts");
  1778. return replaceInstUsesWith(FI, X);
  1779. }
  1780. Instruction *InstCombinerImpl::visitFPToUI(FPToUIInst &FI) {
  1781. if (Instruction *I = foldItoFPtoI(FI))
  1782. return I;
  1783. return commonCastTransforms(FI);
  1784. }
  1785. Instruction *InstCombinerImpl::visitFPToSI(FPToSIInst &FI) {
  1786. if (Instruction *I = foldItoFPtoI(FI))
  1787. return I;
  1788. return commonCastTransforms(FI);
  1789. }
  1790. Instruction *InstCombinerImpl::visitUIToFP(CastInst &CI) {
  1791. return commonCastTransforms(CI);
  1792. }
  1793. Instruction *InstCombinerImpl::visitSIToFP(CastInst &CI) {
  1794. return commonCastTransforms(CI);
  1795. }
  1796. Instruction *InstCombinerImpl::visitIntToPtr(IntToPtrInst &CI) {
  1797. // If the source integer type is not the intptr_t type for this target, do a
  1798. // trunc or zext to the intptr_t type, then inttoptr of it. This allows the
  1799. // cast to be exposed to other transforms.
  1800. unsigned AS = CI.getAddressSpace();
  1801. if (CI.getOperand(0)->getType()->getScalarSizeInBits() !=
  1802. DL.getPointerSizeInBits(AS)) {
  1803. Type *Ty = CI.getOperand(0)->getType()->getWithNewType(
  1804. DL.getIntPtrType(CI.getContext(), AS));
  1805. Value *P = Builder.CreateZExtOrTrunc(CI.getOperand(0), Ty);
  1806. return new IntToPtrInst(P, CI.getType());
  1807. }
  1808. if (Instruction *I = commonCastTransforms(CI))
  1809. return I;
  1810. return nullptr;
  1811. }
  1812. /// Implement the transforms for cast of pointer (bitcast/ptrtoint)
  1813. Instruction *InstCombinerImpl::commonPointerCastTransforms(CastInst &CI) {
  1814. Value *Src = CI.getOperand(0);
  1815. if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Src)) {
  1816. // If casting the result of a getelementptr instruction with no offset, turn
  1817. // this into a cast of the original pointer!
  1818. if (GEP->hasAllZeroIndices() &&
  1819. // If CI is an addrspacecast and GEP changes the poiner type, merging
  1820. // GEP into CI would undo canonicalizing addrspacecast with different
  1821. // pointer types, causing infinite loops.
  1822. (!isa<AddrSpaceCastInst>(CI) ||
  1823. GEP->getType() == GEP->getPointerOperandType())) {
  1824. // Changing the cast operand is usually not a good idea but it is safe
  1825. // here because the pointer operand is being replaced with another
  1826. // pointer operand so the opcode doesn't need to change.
  1827. return replaceOperand(CI, 0, GEP->getOperand(0));
  1828. }
  1829. }
  1830. return commonCastTransforms(CI);
  1831. }
  1832. Instruction *InstCombinerImpl::visitPtrToInt(PtrToIntInst &CI) {
  1833. // If the destination integer type is not the intptr_t type for this target,
  1834. // do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast
  1835. // to be exposed to other transforms.
  1836. Value *SrcOp = CI.getPointerOperand();
  1837. Type *SrcTy = SrcOp->getType();
  1838. Type *Ty = CI.getType();
  1839. unsigned AS = CI.getPointerAddressSpace();
  1840. unsigned TySize = Ty->getScalarSizeInBits();
  1841. unsigned PtrSize = DL.getPointerSizeInBits(AS);
  1842. if (TySize != PtrSize) {
  1843. Type *IntPtrTy =
  1844. SrcTy->getWithNewType(DL.getIntPtrType(CI.getContext(), AS));
  1845. Value *P = Builder.CreatePtrToInt(SrcOp, IntPtrTy);
  1846. return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false);
  1847. }
  1848. if (auto *GEP = dyn_cast<GetElementPtrInst>(SrcOp)) {
  1849. // Fold ptrtoint(gep null, x) to multiply + constant if the GEP has one use.
  1850. // While this can increase the number of instructions it doesn't actually
  1851. // increase the overall complexity since the arithmetic is just part of
  1852. // the GEP otherwise.
  1853. if (GEP->hasOneUse() &&
  1854. isa<ConstantPointerNull>(GEP->getPointerOperand())) {
  1855. return replaceInstUsesWith(CI,
  1856. Builder.CreateIntCast(EmitGEPOffset(GEP), Ty,
  1857. /*isSigned=*/false));
  1858. }
  1859. }
  1860. Value *Vec, *Scalar, *Index;
  1861. if (match(SrcOp, m_OneUse(m_InsertElt(m_IntToPtr(m_Value(Vec)),
  1862. m_Value(Scalar), m_Value(Index)))) &&
  1863. Vec->getType() == Ty) {
  1864. assert(Vec->getType()->getScalarSizeInBits() == PtrSize && "Wrong type");
  1865. // Convert the scalar to int followed by insert to eliminate one cast:
  1866. // p2i (ins (i2p Vec), Scalar, Index --> ins Vec, (p2i Scalar), Index
  1867. Value *NewCast = Builder.CreatePtrToInt(Scalar, Ty->getScalarType());
  1868. return InsertElementInst::Create(Vec, NewCast, Index);
  1869. }
  1870. return commonPointerCastTransforms(CI);
  1871. }
  1872. /// This input value (which is known to have vector type) is being zero extended
  1873. /// or truncated to the specified vector type. Since the zext/trunc is done
  1874. /// using an integer type, we have a (bitcast(cast(bitcast))) pattern,
  1875. /// endianness will impact which end of the vector that is extended or
  1876. /// truncated.
  1877. ///
  1878. /// A vector is always stored with index 0 at the lowest address, which
  1879. /// corresponds to the most significant bits for a big endian stored integer and
  1880. /// the least significant bits for little endian. A trunc/zext of an integer
  1881. /// impacts the big end of the integer. Thus, we need to add/remove elements at
  1882. /// the front of the vector for big endian targets, and the back of the vector
  1883. /// for little endian targets.
  1884. ///
  1885. /// Try to replace it with a shuffle (and vector/vector bitcast) if possible.
  1886. ///
  1887. /// The source and destination vector types may have different element types.
  1888. static Instruction *
  1889. optimizeVectorResizeWithIntegerBitCasts(Value *InVal, VectorType *DestTy,
  1890. InstCombinerImpl &IC) {
  1891. // We can only do this optimization if the output is a multiple of the input
  1892. // element size, or the input is a multiple of the output element size.
  1893. // Convert the input type to have the same element type as the output.
  1894. VectorType *SrcTy = cast<VectorType>(InVal->getType());
  1895. if (SrcTy->getElementType() != DestTy->getElementType()) {
  1896. // The input types don't need to be identical, but for now they must be the
  1897. // same size. There is no specific reason we couldn't handle things like
  1898. // <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten
  1899. // there yet.
  1900. if (SrcTy->getElementType()->getPrimitiveSizeInBits() !=
  1901. DestTy->getElementType()->getPrimitiveSizeInBits())
  1902. return nullptr;
  1903. SrcTy =
  1904. FixedVectorType::get(DestTy->getElementType(),
  1905. cast<FixedVectorType>(SrcTy)->getNumElements());
  1906. InVal = IC.Builder.CreateBitCast(InVal, SrcTy);
  1907. }
  1908. bool IsBigEndian = IC.getDataLayout().isBigEndian();
  1909. unsigned SrcElts = cast<FixedVectorType>(SrcTy)->getNumElements();
  1910. unsigned DestElts = cast<FixedVectorType>(DestTy)->getNumElements();
  1911. assert(SrcElts != DestElts && "Element counts should be different.");
  1912. // Now that the element types match, get the shuffle mask and RHS of the
  1913. // shuffle to use, which depends on whether we're increasing or decreasing the
  1914. // size of the input.
  1915. SmallVector<int, 16> ShuffleMaskStorage;
  1916. ArrayRef<int> ShuffleMask;
  1917. Value *V2;
  1918. // Produce an identify shuffle mask for the src vector.
  1919. ShuffleMaskStorage.resize(SrcElts);
  1920. std::iota(ShuffleMaskStorage.begin(), ShuffleMaskStorage.end(), 0);
  1921. if (SrcElts > DestElts) {
  1922. // If we're shrinking the number of elements (rewriting an integer
  1923. // truncate), just shuffle in the elements corresponding to the least
  1924. // significant bits from the input and use poison as the second shuffle
  1925. // input.
  1926. V2 = PoisonValue::get(SrcTy);
  1927. // Make sure the shuffle mask selects the "least significant bits" by
  1928. // keeping elements from back of the src vector for big endian, and from the
  1929. // front for little endian.
  1930. ShuffleMask = ShuffleMaskStorage;
  1931. if (IsBigEndian)
  1932. ShuffleMask = ShuffleMask.take_back(DestElts);
  1933. else
  1934. ShuffleMask = ShuffleMask.take_front(DestElts);
  1935. } else {
  1936. // If we're increasing the number of elements (rewriting an integer zext),
  1937. // shuffle in all of the elements from InVal. Fill the rest of the result
  1938. // elements with zeros from a constant zero.
  1939. V2 = Constant::getNullValue(SrcTy);
  1940. // Use first elt from V2 when indicating zero in the shuffle mask.
  1941. uint32_t NullElt = SrcElts;
  1942. // Extend with null values in the "most significant bits" by adding elements
  1943. // in front of the src vector for big endian, and at the back for little
  1944. // endian.
  1945. unsigned DeltaElts = DestElts - SrcElts;
  1946. if (IsBigEndian)
  1947. ShuffleMaskStorage.insert(ShuffleMaskStorage.begin(), DeltaElts, NullElt);
  1948. else
  1949. ShuffleMaskStorage.append(DeltaElts, NullElt);
  1950. ShuffleMask = ShuffleMaskStorage;
  1951. }
  1952. return new ShuffleVectorInst(InVal, V2, ShuffleMask);
  1953. }
  1954. static bool isMultipleOfTypeSize(unsigned Value, Type *Ty) {
  1955. return Value % Ty->getPrimitiveSizeInBits() == 0;
  1956. }
  1957. static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) {
  1958. return Value / Ty->getPrimitiveSizeInBits();
  1959. }
  1960. /// V is a value which is inserted into a vector of VecEltTy.
  1961. /// Look through the value to see if we can decompose it into
  1962. /// insertions into the vector. See the example in the comment for
  1963. /// OptimizeIntegerToVectorInsertions for the pattern this handles.
  1964. /// The type of V is always a non-zero multiple of VecEltTy's size.
  1965. /// Shift is the number of bits between the lsb of V and the lsb of
  1966. /// the vector.
  1967. ///
  1968. /// This returns false if the pattern can't be matched or true if it can,
  1969. /// filling in Elements with the elements found here.
  1970. static bool collectInsertionElements(Value *V, unsigned Shift,
  1971. SmallVectorImpl<Value *> &Elements,
  1972. Type *VecEltTy, bool isBigEndian) {
  1973. assert(isMultipleOfTypeSize(Shift, VecEltTy) &&
  1974. "Shift should be a multiple of the element type size");
  1975. // Undef values never contribute useful bits to the result.
  1976. if (isa<UndefValue>(V)) return true;
  1977. // If we got down to a value of the right type, we win, try inserting into the
  1978. // right element.
  1979. if (V->getType() == VecEltTy) {
  1980. // Inserting null doesn't actually insert any elements.
  1981. if (Constant *C = dyn_cast<Constant>(V))
  1982. if (C->isNullValue())
  1983. return true;
  1984. unsigned ElementIndex = getTypeSizeIndex(Shift, VecEltTy);
  1985. if (isBigEndian)
  1986. ElementIndex = Elements.size() - ElementIndex - 1;
  1987. // Fail if multiple elements are inserted into this slot.
  1988. if (Elements[ElementIndex])
  1989. return false;
  1990. Elements[ElementIndex] = V;
  1991. return true;
  1992. }
  1993. if (Constant *C = dyn_cast<Constant>(V)) {
  1994. // Figure out the # elements this provides, and bitcast it or slice it up
  1995. // as required.
  1996. unsigned NumElts = getTypeSizeIndex(C->getType()->getPrimitiveSizeInBits(),
  1997. VecEltTy);
  1998. // If the constant is the size of a vector element, we just need to bitcast
  1999. // it to the right type so it gets properly inserted.
  2000. if (NumElts == 1)
  2001. return collectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy),
  2002. Shift, Elements, VecEltTy, isBigEndian);
  2003. // Okay, this is a constant that covers multiple elements. Slice it up into
  2004. // pieces and insert each element-sized piece into the vector.
  2005. if (!isa<IntegerType>(C->getType()))
  2006. C = ConstantExpr::getBitCast(C, IntegerType::get(V->getContext(),
  2007. C->getType()->getPrimitiveSizeInBits()));
  2008. unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits();
  2009. Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize);
  2010. for (unsigned i = 0; i != NumElts; ++i) {
  2011. unsigned ShiftI = Shift+i*ElementSize;
  2012. Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(),
  2013. ShiftI));
  2014. Piece = ConstantExpr::getTrunc(Piece, ElementIntTy);
  2015. if (!collectInsertionElements(Piece, ShiftI, Elements, VecEltTy,
  2016. isBigEndian))
  2017. return false;
  2018. }
  2019. return true;
  2020. }
  2021. if (!V->hasOneUse()) return false;
  2022. Instruction *I = dyn_cast<Instruction>(V);
  2023. if (!I) return false;
  2024. switch (I->getOpcode()) {
  2025. default: return false; // Unhandled case.
  2026. case Instruction::BitCast:
  2027. return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
  2028. isBigEndian);
  2029. case Instruction::ZExt:
  2030. if (!isMultipleOfTypeSize(
  2031. I->getOperand(0)->getType()->getPrimitiveSizeInBits(),
  2032. VecEltTy))
  2033. return false;
  2034. return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
  2035. isBigEndian);
  2036. case Instruction::Or:
  2037. return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
  2038. isBigEndian) &&
  2039. collectInsertionElements(I->getOperand(1), Shift, Elements, VecEltTy,
  2040. isBigEndian);
  2041. case Instruction::Shl: {
  2042. // Must be shifting by a constant that is a multiple of the element size.
  2043. ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
  2044. if (!CI) return false;
  2045. Shift += CI->getZExtValue();
  2046. if (!isMultipleOfTypeSize(Shift, VecEltTy)) return false;
  2047. return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
  2048. isBigEndian);
  2049. }
  2050. }
  2051. }
  2052. /// If the input is an 'or' instruction, we may be doing shifts and ors to
  2053. /// assemble the elements of the vector manually.
  2054. /// Try to rip the code out and replace it with insertelements. This is to
  2055. /// optimize code like this:
  2056. ///
  2057. /// %tmp37 = bitcast float %inc to i32
  2058. /// %tmp38 = zext i32 %tmp37 to i64
  2059. /// %tmp31 = bitcast float %inc5 to i32
  2060. /// %tmp32 = zext i32 %tmp31 to i64
  2061. /// %tmp33 = shl i64 %tmp32, 32
  2062. /// %ins35 = or i64 %tmp33, %tmp38
  2063. /// %tmp43 = bitcast i64 %ins35 to <2 x float>
  2064. ///
  2065. /// Into two insertelements that do "buildvector{%inc, %inc5}".
  2066. static Value *optimizeIntegerToVectorInsertions(BitCastInst &CI,
  2067. InstCombinerImpl &IC) {
  2068. auto *DestVecTy = cast<FixedVectorType>(CI.getType());
  2069. Value *IntInput = CI.getOperand(0);
  2070. SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
  2071. if (!collectInsertionElements(IntInput, 0, Elements,
  2072. DestVecTy->getElementType(),
  2073. IC.getDataLayout().isBigEndian()))
  2074. return nullptr;
  2075. // If we succeeded, we know that all of the element are specified by Elements
  2076. // or are zero if Elements has a null entry. Recast this as a set of
  2077. // insertions.
  2078. Value *Result = Constant::getNullValue(CI.getType());
  2079. for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
  2080. if (!Elements[i]) continue; // Unset element.
  2081. Result = IC.Builder.CreateInsertElement(Result, Elements[i],
  2082. IC.Builder.getInt32(i));
  2083. }
  2084. return Result;
  2085. }
  2086. /// Canonicalize scalar bitcasts of extracted elements into a bitcast of the
  2087. /// vector followed by extract element. The backend tends to handle bitcasts of
  2088. /// vectors better than bitcasts of scalars because vector registers are
  2089. /// usually not type-specific like scalar integer or scalar floating-point.
  2090. static Instruction *canonicalizeBitCastExtElt(BitCastInst &BitCast,
  2091. InstCombinerImpl &IC) {
  2092. // TODO: Create and use a pattern matcher for ExtractElementInst.
  2093. auto *ExtElt = dyn_cast<ExtractElementInst>(BitCast.getOperand(0));
  2094. if (!ExtElt || !ExtElt->hasOneUse())
  2095. return nullptr;
  2096. // The bitcast must be to a vectorizable type, otherwise we can't make a new
  2097. // type to extract from.
  2098. Type *DestType = BitCast.getType();
  2099. if (!VectorType::isValidElementType(DestType))
  2100. return nullptr;
  2101. auto *NewVecType = VectorType::get(DestType, ExtElt->getVectorOperandType());
  2102. auto *NewBC = IC.Builder.CreateBitCast(ExtElt->getVectorOperand(),
  2103. NewVecType, "bc");
  2104. return ExtractElementInst::Create(NewBC, ExtElt->getIndexOperand());
  2105. }
  2106. /// Change the type of a bitwise logic operation if we can eliminate a bitcast.
  2107. static Instruction *foldBitCastBitwiseLogic(BitCastInst &BitCast,
  2108. InstCombiner::BuilderTy &Builder) {
  2109. Type *DestTy = BitCast.getType();
  2110. BinaryOperator *BO;
  2111. if (!DestTy->isIntOrIntVectorTy() ||
  2112. !match(BitCast.getOperand(0), m_OneUse(m_BinOp(BO))) ||
  2113. !BO->isBitwiseLogicOp())
  2114. return nullptr;
  2115. // FIXME: This transform is restricted to vector types to avoid backend
  2116. // problems caused by creating potentially illegal operations. If a fix-up is
  2117. // added to handle that situation, we can remove this check.
  2118. if (!DestTy->isVectorTy() || !BO->getType()->isVectorTy())
  2119. return nullptr;
  2120. Value *X;
  2121. if (match(BO->getOperand(0), m_OneUse(m_BitCast(m_Value(X)))) &&
  2122. X->getType() == DestTy && !isa<Constant>(X)) {
  2123. // bitcast(logic(bitcast(X), Y)) --> logic'(X, bitcast(Y))
  2124. Value *CastedOp1 = Builder.CreateBitCast(BO->getOperand(1), DestTy);
  2125. return BinaryOperator::Create(BO->getOpcode(), X, CastedOp1);
  2126. }
  2127. if (match(BO->getOperand(1), m_OneUse(m_BitCast(m_Value(X)))) &&
  2128. X->getType() == DestTy && !isa<Constant>(X)) {
  2129. // bitcast(logic(Y, bitcast(X))) --> logic'(bitcast(Y), X)
  2130. Value *CastedOp0 = Builder.CreateBitCast(BO->getOperand(0), DestTy);
  2131. return BinaryOperator::Create(BO->getOpcode(), CastedOp0, X);
  2132. }
  2133. // Canonicalize vector bitcasts to come before vector bitwise logic with a
  2134. // constant. This eases recognition of special constants for later ops.
  2135. // Example:
  2136. // icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b
  2137. Constant *C;
  2138. if (match(BO->getOperand(1), m_Constant(C))) {
  2139. // bitcast (logic X, C) --> logic (bitcast X, C')
  2140. Value *CastedOp0 = Builder.CreateBitCast(BO->getOperand(0), DestTy);
  2141. Value *CastedC = Builder.CreateBitCast(C, DestTy);
  2142. return BinaryOperator::Create(BO->getOpcode(), CastedOp0, CastedC);
  2143. }
  2144. return nullptr;
  2145. }
  2146. /// Change the type of a select if we can eliminate a bitcast.
  2147. static Instruction *foldBitCastSelect(BitCastInst &BitCast,
  2148. InstCombiner::BuilderTy &Builder) {
  2149. Value *Cond, *TVal, *FVal;
  2150. if (!match(BitCast.getOperand(0),
  2151. m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
  2152. return nullptr;
  2153. // A vector select must maintain the same number of elements in its operands.
  2154. Type *CondTy = Cond->getType();
  2155. Type *DestTy = BitCast.getType();
  2156. if (auto *CondVTy = dyn_cast<VectorType>(CondTy))
  2157. if (!DestTy->isVectorTy() ||
  2158. CondVTy->getElementCount() !=
  2159. cast<VectorType>(DestTy)->getElementCount())
  2160. return nullptr;
  2161. // FIXME: This transform is restricted from changing the select between
  2162. // scalars and vectors to avoid backend problems caused by creating
  2163. // potentially illegal operations. If a fix-up is added to handle that
  2164. // situation, we can remove this check.
  2165. if (DestTy->isVectorTy() != TVal->getType()->isVectorTy())
  2166. return nullptr;
  2167. auto *Sel = cast<Instruction>(BitCast.getOperand(0));
  2168. Value *X;
  2169. if (match(TVal, m_OneUse(m_BitCast(m_Value(X)))) && X->getType() == DestTy &&
  2170. !isa<Constant>(X)) {
  2171. // bitcast(select(Cond, bitcast(X), Y)) --> select'(Cond, X, bitcast(Y))
  2172. Value *CastedVal = Builder.CreateBitCast(FVal, DestTy);
  2173. return SelectInst::Create(Cond, X, CastedVal, "", nullptr, Sel);
  2174. }
  2175. if (match(FVal, m_OneUse(m_BitCast(m_Value(X)))) && X->getType() == DestTy &&
  2176. !isa<Constant>(X)) {
  2177. // bitcast(select(Cond, Y, bitcast(X))) --> select'(Cond, bitcast(Y), X)
  2178. Value *CastedVal = Builder.CreateBitCast(TVal, DestTy);
  2179. return SelectInst::Create(Cond, CastedVal, X, "", nullptr, Sel);
  2180. }
  2181. return nullptr;
  2182. }
  2183. /// Check if all users of CI are StoreInsts.
  2184. static bool hasStoreUsersOnly(CastInst &CI) {
  2185. for (User *U : CI.users()) {
  2186. if (!isa<StoreInst>(U))
  2187. return false;
  2188. }
  2189. return true;
  2190. }
  2191. /// This function handles following case
  2192. ///
  2193. /// A -> B cast
  2194. /// PHI
  2195. /// B -> A cast
  2196. ///
  2197. /// All the related PHI nodes can be replaced by new PHI nodes with type A.
  2198. /// The uses of \p CI can be changed to the new PHI node corresponding to \p PN.
  2199. Instruction *InstCombinerImpl::optimizeBitCastFromPhi(CastInst &CI,
  2200. PHINode *PN) {
  2201. // BitCast used by Store can be handled in InstCombineLoadStoreAlloca.cpp.
  2202. if (hasStoreUsersOnly(CI))
  2203. return nullptr;
  2204. Value *Src = CI.getOperand(0);
  2205. Type *SrcTy = Src->getType(); // Type B
  2206. Type *DestTy = CI.getType(); // Type A
  2207. SmallVector<PHINode *, 4> PhiWorklist;
  2208. SmallSetVector<PHINode *, 4> OldPhiNodes;
  2209. // Find all of the A->B casts and PHI nodes.
  2210. // We need to inspect all related PHI nodes, but PHIs can be cyclic, so
  2211. // OldPhiNodes is used to track all known PHI nodes, before adding a new
  2212. // PHI to PhiWorklist, it is checked against and added to OldPhiNodes first.
  2213. PhiWorklist.push_back(PN);
  2214. OldPhiNodes.insert(PN);
  2215. while (!PhiWorklist.empty()) {
  2216. auto *OldPN = PhiWorklist.pop_back_val();
  2217. for (Value *IncValue : OldPN->incoming_values()) {
  2218. if (isa<Constant>(IncValue))
  2219. continue;
  2220. if (auto *LI = dyn_cast<LoadInst>(IncValue)) {
  2221. // If there is a sequence of one or more load instructions, each loaded
  2222. // value is used as address of later load instruction, bitcast is
  2223. // necessary to change the value type, don't optimize it. For
  2224. // simplicity we give up if the load address comes from another load.
  2225. Value *Addr = LI->getOperand(0);
  2226. if (Addr == &CI || isa<LoadInst>(Addr))
  2227. return nullptr;
  2228. // Don't tranform "load <256 x i32>, <256 x i32>*" to
  2229. // "load x86_amx, x86_amx*", because x86_amx* is invalid.
  2230. // TODO: Remove this check when bitcast between vector and x86_amx
  2231. // is replaced with a specific intrinsic.
  2232. if (DestTy->isX86_AMXTy())
  2233. return nullptr;
  2234. if (LI->hasOneUse() && LI->isSimple())
  2235. continue;
  2236. // If a LoadInst has more than one use, changing the type of loaded
  2237. // value may create another bitcast.
  2238. return nullptr;
  2239. }
  2240. if (auto *PNode = dyn_cast<PHINode>(IncValue)) {
  2241. if (OldPhiNodes.insert(PNode))
  2242. PhiWorklist.push_back(PNode);
  2243. continue;
  2244. }
  2245. auto *BCI = dyn_cast<BitCastInst>(IncValue);
  2246. // We can't handle other instructions.
  2247. if (!BCI)
  2248. return nullptr;
  2249. // Verify it's a A->B cast.
  2250. Type *TyA = BCI->getOperand(0)->getType();
  2251. Type *TyB = BCI->getType();
  2252. if (TyA != DestTy || TyB != SrcTy)
  2253. return nullptr;
  2254. }
  2255. }
  2256. // Check that each user of each old PHI node is something that we can
  2257. // rewrite, so that all of the old PHI nodes can be cleaned up afterwards.
  2258. for (auto *OldPN : OldPhiNodes) {
  2259. for (User *V : OldPN->users()) {
  2260. if (auto *SI = dyn_cast<StoreInst>(V)) {
  2261. if (!SI->isSimple() || SI->getOperand(0) != OldPN)
  2262. return nullptr;
  2263. } else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
  2264. // Verify it's a B->A cast.
  2265. Type *TyB = BCI->getOperand(0)->getType();
  2266. Type *TyA = BCI->getType();
  2267. if (TyA != DestTy || TyB != SrcTy)
  2268. return nullptr;
  2269. } else if (auto *PHI = dyn_cast<PHINode>(V)) {
  2270. // As long as the user is another old PHI node, then even if we don't
  2271. // rewrite it, the PHI web we're considering won't have any users
  2272. // outside itself, so it'll be dead.
  2273. if (!OldPhiNodes.contains(PHI))
  2274. return nullptr;
  2275. } else {
  2276. return nullptr;
  2277. }
  2278. }
  2279. }
  2280. // For each old PHI node, create a corresponding new PHI node with a type A.
  2281. SmallDenseMap<PHINode *, PHINode *> NewPNodes;
  2282. for (auto *OldPN : OldPhiNodes) {
  2283. Builder.SetInsertPoint(OldPN);
  2284. PHINode *NewPN = Builder.CreatePHI(DestTy, OldPN->getNumOperands());
  2285. NewPNodes[OldPN] = NewPN;
  2286. }
  2287. // Fill in the operands of new PHI nodes.
  2288. for (auto *OldPN : OldPhiNodes) {
  2289. PHINode *NewPN = NewPNodes[OldPN];
  2290. for (unsigned j = 0, e = OldPN->getNumOperands(); j != e; ++j) {
  2291. Value *V = OldPN->getOperand(j);
  2292. Value *NewV = nullptr;
  2293. if (auto *C = dyn_cast<Constant>(V)) {
  2294. NewV = ConstantExpr::getBitCast(C, DestTy);
  2295. } else if (auto *LI = dyn_cast<LoadInst>(V)) {
  2296. // Explicitly perform load combine to make sure no opposing transform
  2297. // can remove the bitcast in the meantime and trigger an infinite loop.
  2298. Builder.SetInsertPoint(LI);
  2299. NewV = combineLoadToNewType(*LI, DestTy);
  2300. // Remove the old load and its use in the old phi, which itself becomes
  2301. // dead once the whole transform finishes.
  2302. replaceInstUsesWith(*LI, PoisonValue::get(LI->getType()));
  2303. eraseInstFromFunction(*LI);
  2304. } else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
  2305. NewV = BCI->getOperand(0);
  2306. } else if (auto *PrevPN = dyn_cast<PHINode>(V)) {
  2307. NewV = NewPNodes[PrevPN];
  2308. }
  2309. assert(NewV);
  2310. NewPN->addIncoming(NewV, OldPN->getIncomingBlock(j));
  2311. }
  2312. }
  2313. // Traverse all accumulated PHI nodes and process its users,
  2314. // which are Stores and BitcCasts. Without this processing
  2315. // NewPHI nodes could be replicated and could lead to extra
  2316. // moves generated after DeSSA.
  2317. // If there is a store with type B, change it to type A.
  2318. // Replace users of BitCast B->A with NewPHI. These will help
  2319. // later to get rid off a closure formed by OldPHI nodes.
  2320. Instruction *RetVal = nullptr;
  2321. for (auto *OldPN : OldPhiNodes) {
  2322. PHINode *NewPN = NewPNodes[OldPN];
  2323. for (User *V : make_early_inc_range(OldPN->users())) {
  2324. if (auto *SI = dyn_cast<StoreInst>(V)) {
  2325. assert(SI->isSimple() && SI->getOperand(0) == OldPN);
  2326. Builder.SetInsertPoint(SI);
  2327. auto *NewBC =
  2328. cast<BitCastInst>(Builder.CreateBitCast(NewPN, SrcTy));
  2329. SI->setOperand(0, NewBC);
  2330. Worklist.push(SI);
  2331. assert(hasStoreUsersOnly(*NewBC));
  2332. }
  2333. else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
  2334. Type *TyB = BCI->getOperand(0)->getType();
  2335. Type *TyA = BCI->getType();
  2336. assert(TyA == DestTy && TyB == SrcTy);
  2337. (void) TyA;
  2338. (void) TyB;
  2339. Instruction *I = replaceInstUsesWith(*BCI, NewPN);
  2340. if (BCI == &CI)
  2341. RetVal = I;
  2342. } else if (auto *PHI = dyn_cast<PHINode>(V)) {
  2343. assert(OldPhiNodes.contains(PHI));
  2344. (void) PHI;
  2345. } else {
  2346. llvm_unreachable("all uses should be handled");
  2347. }
  2348. }
  2349. }
  2350. return RetVal;
  2351. }
  2352. static Instruction *convertBitCastToGEP(BitCastInst &CI, IRBuilderBase &Builder,
  2353. const DataLayout &DL) {
  2354. Value *Src = CI.getOperand(0);
  2355. PointerType *SrcPTy = cast<PointerType>(Src->getType());
  2356. PointerType *DstPTy = cast<PointerType>(CI.getType());
  2357. // Bitcasts involving opaque pointers cannot be converted into a GEP.
  2358. if (SrcPTy->isOpaque() || DstPTy->isOpaque())
  2359. return nullptr;
  2360. Type *DstElTy = DstPTy->getNonOpaquePointerElementType();
  2361. Type *SrcElTy = SrcPTy->getNonOpaquePointerElementType();
  2362. // When the type pointed to is not sized the cast cannot be
  2363. // turned into a gep.
  2364. if (!SrcElTy->isSized())
  2365. return nullptr;
  2366. // If the source and destination are pointers, and this cast is equivalent
  2367. // to a getelementptr X, 0, 0, 0... turn it into the appropriate gep.
  2368. // This can enhance SROA and other transforms that want type-safe pointers.
  2369. unsigned NumZeros = 0;
  2370. while (SrcElTy && SrcElTy != DstElTy) {
  2371. SrcElTy = GetElementPtrInst::getTypeAtIndex(SrcElTy, (uint64_t)0);
  2372. ++NumZeros;
  2373. }
  2374. // If we found a path from the src to dest, create the getelementptr now.
  2375. if (SrcElTy == DstElTy) {
  2376. SmallVector<Value *, 8> Idxs(NumZeros + 1, Builder.getInt32(0));
  2377. GetElementPtrInst *GEP = GetElementPtrInst::Create(
  2378. SrcPTy->getNonOpaquePointerElementType(), Src, Idxs);
  2379. // If the source pointer is dereferenceable, then assume it points to an
  2380. // allocated object and apply "inbounds" to the GEP.
  2381. bool CanBeNull, CanBeFreed;
  2382. if (Src->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed)) {
  2383. // In a non-default address space (not 0), a null pointer can not be
  2384. // assumed inbounds, so ignore that case (dereferenceable_or_null).
  2385. // The reason is that 'null' is not treated differently in these address
  2386. // spaces, and we consequently ignore the 'gep inbounds' special case
  2387. // for 'null' which allows 'inbounds' on 'null' if the indices are
  2388. // zeros.
  2389. if (SrcPTy->getAddressSpace() == 0 || !CanBeNull)
  2390. GEP->setIsInBounds();
  2391. }
  2392. return GEP;
  2393. }
  2394. return nullptr;
  2395. }
  2396. Instruction *InstCombinerImpl::visitBitCast(BitCastInst &CI) {
  2397. // If the operands are integer typed then apply the integer transforms,
  2398. // otherwise just apply the common ones.
  2399. Value *Src = CI.getOperand(0);
  2400. Type *SrcTy = Src->getType();
  2401. Type *DestTy = CI.getType();
  2402. // Get rid of casts from one type to the same type. These are useless and can
  2403. // be replaced by the operand.
  2404. if (DestTy == Src->getType())
  2405. return replaceInstUsesWith(CI, Src);
  2406. if (isa<PointerType>(SrcTy) && isa<PointerType>(DestTy)) {
  2407. // If we are casting a alloca to a pointer to a type of the same
  2408. // size, rewrite the allocation instruction to allocate the "right" type.
  2409. // There is no need to modify malloc calls because it is their bitcast that
  2410. // needs to be cleaned up.
  2411. if (AllocaInst *AI = dyn_cast<AllocaInst>(Src))
  2412. if (Instruction *V = PromoteCastOfAllocation(CI, *AI))
  2413. return V;
  2414. if (Instruction *I = convertBitCastToGEP(CI, Builder, DL))
  2415. return I;
  2416. }
  2417. if (FixedVectorType *DestVTy = dyn_cast<FixedVectorType>(DestTy)) {
  2418. // Beware: messing with this target-specific oddity may cause trouble.
  2419. if (DestVTy->getNumElements() == 1 && SrcTy->isX86_MMXTy()) {
  2420. Value *Elem = Builder.CreateBitCast(Src, DestVTy->getElementType());
  2421. return InsertElementInst::Create(PoisonValue::get(DestTy), Elem,
  2422. Constant::getNullValue(Type::getInt32Ty(CI.getContext())));
  2423. }
  2424. if (isa<IntegerType>(SrcTy)) {
  2425. // If this is a cast from an integer to vector, check to see if the input
  2426. // is a trunc or zext of a bitcast from vector. If so, we can replace all
  2427. // the casts with a shuffle and (potentially) a bitcast.
  2428. if (isa<TruncInst>(Src) || isa<ZExtInst>(Src)) {
  2429. CastInst *SrcCast = cast<CastInst>(Src);
  2430. if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0)))
  2431. if (isa<VectorType>(BCIn->getOperand(0)->getType()))
  2432. if (Instruction *I = optimizeVectorResizeWithIntegerBitCasts(
  2433. BCIn->getOperand(0), cast<VectorType>(DestTy), *this))
  2434. return I;
  2435. }
  2436. // If the input is an 'or' instruction, we may be doing shifts and ors to
  2437. // assemble the elements of the vector manually. Try to rip the code out
  2438. // and replace it with insertelements.
  2439. if (Value *V = optimizeIntegerToVectorInsertions(CI, *this))
  2440. return replaceInstUsesWith(CI, V);
  2441. }
  2442. }
  2443. if (FixedVectorType *SrcVTy = dyn_cast<FixedVectorType>(SrcTy)) {
  2444. if (SrcVTy->getNumElements() == 1) {
  2445. // If our destination is not a vector, then make this a straight
  2446. // scalar-scalar cast.
  2447. if (!DestTy->isVectorTy()) {
  2448. Value *Elem =
  2449. Builder.CreateExtractElement(Src,
  2450. Constant::getNullValue(Type::getInt32Ty(CI.getContext())));
  2451. return CastInst::Create(Instruction::BitCast, Elem, DestTy);
  2452. }
  2453. // Otherwise, see if our source is an insert. If so, then use the scalar
  2454. // component directly:
  2455. // bitcast (inselt <1 x elt> V, X, 0) to <n x m> --> bitcast X to <n x m>
  2456. if (auto *InsElt = dyn_cast<InsertElementInst>(Src))
  2457. return new BitCastInst(InsElt->getOperand(1), DestTy);
  2458. }
  2459. // Convert an artificial vector insert into more analyzable bitwise logic.
  2460. unsigned BitWidth = DestTy->getScalarSizeInBits();
  2461. Value *X, *Y;
  2462. uint64_t IndexC;
  2463. if (match(Src, m_OneUse(m_InsertElt(m_OneUse(m_BitCast(m_Value(X))),
  2464. m_Value(Y), m_ConstantInt(IndexC)))) &&
  2465. DestTy->isIntegerTy() && X->getType() == DestTy &&
  2466. Y->getType()->isIntegerTy() && isDesirableIntType(BitWidth)) {
  2467. // Adjust for big endian - the LSBs are at the high index.
  2468. if (DL.isBigEndian())
  2469. IndexC = SrcVTy->getNumElements() - 1 - IndexC;
  2470. // We only handle (endian-normalized) insert to index 0. Any other insert
  2471. // would require a left-shift, so that is an extra instruction.
  2472. if (IndexC == 0) {
  2473. // bitcast (inselt (bitcast X), Y, 0) --> or (and X, MaskC), (zext Y)
  2474. unsigned EltWidth = Y->getType()->getScalarSizeInBits();
  2475. APInt MaskC = APInt::getHighBitsSet(BitWidth, BitWidth - EltWidth);
  2476. Value *AndX = Builder.CreateAnd(X, MaskC);
  2477. Value *ZextY = Builder.CreateZExt(Y, DestTy);
  2478. return BinaryOperator::CreateOr(AndX, ZextY);
  2479. }
  2480. }
  2481. }
  2482. if (auto *Shuf = dyn_cast<ShuffleVectorInst>(Src)) {
  2483. // Okay, we have (bitcast (shuffle ..)). Check to see if this is
  2484. // a bitcast to a vector with the same # elts.
  2485. Value *ShufOp0 = Shuf->getOperand(0);
  2486. Value *ShufOp1 = Shuf->getOperand(1);
  2487. auto ShufElts = cast<VectorType>(Shuf->getType())->getElementCount();
  2488. auto SrcVecElts = cast<VectorType>(ShufOp0->getType())->getElementCount();
  2489. if (Shuf->hasOneUse() && DestTy->isVectorTy() &&
  2490. cast<VectorType>(DestTy)->getElementCount() == ShufElts &&
  2491. ShufElts == SrcVecElts) {
  2492. BitCastInst *Tmp;
  2493. // If either of the operands is a cast from CI.getType(), then
  2494. // evaluating the shuffle in the casted destination's type will allow
  2495. // us to eliminate at least one cast.
  2496. if (((Tmp = dyn_cast<BitCastInst>(ShufOp0)) &&
  2497. Tmp->getOperand(0)->getType() == DestTy) ||
  2498. ((Tmp = dyn_cast<BitCastInst>(ShufOp1)) &&
  2499. Tmp->getOperand(0)->getType() == DestTy)) {
  2500. Value *LHS = Builder.CreateBitCast(ShufOp0, DestTy);
  2501. Value *RHS = Builder.CreateBitCast(ShufOp1, DestTy);
  2502. // Return a new shuffle vector. Use the same element ID's, as we
  2503. // know the vector types match #elts.
  2504. return new ShuffleVectorInst(LHS, RHS, Shuf->getShuffleMask());
  2505. }
  2506. }
  2507. // A bitcasted-to-scalar and byte-reversing shuffle is better recognized as
  2508. // a byte-swap:
  2509. // bitcast <N x i8> (shuf X, undef, <N, N-1,...0>) --> bswap (bitcast X)
  2510. // TODO: We should match the related pattern for bitreverse.
  2511. if (DestTy->isIntegerTy() &&
  2512. DL.isLegalInteger(DestTy->getScalarSizeInBits()) &&
  2513. SrcTy->getScalarSizeInBits() == 8 &&
  2514. ShufElts.getKnownMinValue() % 2 == 0 && Shuf->hasOneUse() &&
  2515. Shuf->isReverse()) {
  2516. assert(ShufOp0->getType() == SrcTy && "Unexpected shuffle mask");
  2517. assert(match(ShufOp1, m_Undef()) && "Unexpected shuffle op");
  2518. Function *Bswap =
  2519. Intrinsic::getDeclaration(CI.getModule(), Intrinsic::bswap, DestTy);
  2520. Value *ScalarX = Builder.CreateBitCast(ShufOp0, DestTy);
  2521. return CallInst::Create(Bswap, { ScalarX });
  2522. }
  2523. }
  2524. // Handle the A->B->A cast, and there is an intervening PHI node.
  2525. if (PHINode *PN = dyn_cast<PHINode>(Src))
  2526. if (Instruction *I = optimizeBitCastFromPhi(CI, PN))
  2527. return I;
  2528. if (Instruction *I = canonicalizeBitCastExtElt(CI, *this))
  2529. return I;
  2530. if (Instruction *I = foldBitCastBitwiseLogic(CI, Builder))
  2531. return I;
  2532. if (Instruction *I = foldBitCastSelect(CI, Builder))
  2533. return I;
  2534. if (SrcTy->isPointerTy())
  2535. return commonPointerCastTransforms(CI);
  2536. return commonCastTransforms(CI);
  2537. }
  2538. Instruction *InstCombinerImpl::visitAddrSpaceCast(AddrSpaceCastInst &CI) {
  2539. // If the destination pointer element type is not the same as the source's
  2540. // first do a bitcast to the destination type, and then the addrspacecast.
  2541. // This allows the cast to be exposed to other transforms.
  2542. Value *Src = CI.getOperand(0);
  2543. PointerType *SrcTy = cast<PointerType>(Src->getType()->getScalarType());
  2544. PointerType *DestTy = cast<PointerType>(CI.getType()->getScalarType());
  2545. if (!SrcTy->hasSameElementTypeAs(DestTy)) {
  2546. Type *MidTy =
  2547. PointerType::getWithSamePointeeType(DestTy, SrcTy->getAddressSpace());
  2548. // Handle vectors of pointers.
  2549. if (VectorType *VT = dyn_cast<VectorType>(CI.getType()))
  2550. MidTy = VectorType::get(MidTy, VT->getElementCount());
  2551. Value *NewBitCast = Builder.CreateBitCast(Src, MidTy);
  2552. return new AddrSpaceCastInst(NewBitCast, CI.getType());
  2553. }
  2554. return commonPointerCastTransforms(CI);
  2555. }