InstCombineSimplifyDemanded.cpp 62 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613
  1. //===- InstCombineSimplifyDemanded.cpp ------------------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file contains logic for simplifying instructions based on information
  10. // about how they are used.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "InstCombineInternal.h"
  14. #include "llvm/Analysis/TargetTransformInfo.h"
  15. #include "llvm/Analysis/ValueTracking.h"
  16. #include "llvm/IR/IntrinsicInst.h"
  17. #include "llvm/IR/PatternMatch.h"
  18. #include "llvm/Support/KnownBits.h"
  19. #include "llvm/Transforms/InstCombine/InstCombiner.h"
  20. using namespace llvm;
  21. using namespace llvm::PatternMatch;
  22. #define DEBUG_TYPE "instcombine"
  23. /// Check to see if the specified operand of the specified instruction is a
  24. /// constant integer. If so, check to see if there are any bits set in the
  25. /// constant that are not demanded. If so, shrink the constant and return true.
  26. static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
  27. const APInt &Demanded) {
  28. assert(I && "No instruction?");
  29. assert(OpNo < I->getNumOperands() && "Operand index too large");
  30. // The operand must be a constant integer or splat integer.
  31. Value *Op = I->getOperand(OpNo);
  32. const APInt *C;
  33. if (!match(Op, m_APInt(C)))
  34. return false;
  35. // If there are no bits set that aren't demanded, nothing to do.
  36. if (C->isSubsetOf(Demanded))
  37. return false;
  38. // This instruction is producing bits that are not demanded. Shrink the RHS.
  39. I->setOperand(OpNo, ConstantInt::get(Op->getType(), *C & Demanded));
  40. return true;
  41. }
  42. /// Inst is an integer instruction that SimplifyDemandedBits knows about. See if
  43. /// the instruction has any properties that allow us to simplify its operands.
  44. bool InstCombinerImpl::SimplifyDemandedInstructionBits(Instruction &Inst) {
  45. unsigned BitWidth = Inst.getType()->getScalarSizeInBits();
  46. KnownBits Known(BitWidth);
  47. APInt DemandedMask(APInt::getAllOnes(BitWidth));
  48. Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, Known,
  49. 0, &Inst);
  50. if (!V) return false;
  51. if (V == &Inst) return true;
  52. replaceInstUsesWith(Inst, V);
  53. return true;
  54. }
  55. /// This form of SimplifyDemandedBits simplifies the specified instruction
  56. /// operand if possible, updating it in place. It returns true if it made any
  57. /// change and false otherwise.
  58. bool InstCombinerImpl::SimplifyDemandedBits(Instruction *I, unsigned OpNo,
  59. const APInt &DemandedMask,
  60. KnownBits &Known, unsigned Depth) {
  61. Use &U = I->getOperandUse(OpNo);
  62. Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask, Known,
  63. Depth, I);
  64. if (!NewVal) return false;
  65. if (Instruction* OpInst = dyn_cast<Instruction>(U))
  66. salvageDebugInfo(*OpInst);
  67. replaceUse(U, NewVal);
  68. return true;
  69. }
  70. /// This function attempts to replace V with a simpler value based on the
  71. /// demanded bits. When this function is called, it is known that only the bits
  72. /// set in DemandedMask of the result of V are ever used downstream.
  73. /// Consequently, depending on the mask and V, it may be possible to replace V
  74. /// with a constant or one of its operands. In such cases, this function does
  75. /// the replacement and returns true. In all other cases, it returns false after
  76. /// analyzing the expression and setting KnownOne and known to be one in the
  77. /// expression. Known.Zero contains all the bits that are known to be zero in
  78. /// the expression. These are provided to potentially allow the caller (which
  79. /// might recursively be SimplifyDemandedBits itself) to simplify the
  80. /// expression.
  81. /// Known.One and Known.Zero always follow the invariant that:
  82. /// Known.One & Known.Zero == 0.
  83. /// That is, a bit can't be both 1 and 0. Note that the bits in Known.One and
  84. /// Known.Zero may only be accurate for those bits set in DemandedMask. Note
  85. /// also that the bitwidth of V, DemandedMask, Known.Zero and Known.One must all
  86. /// be the same.
  87. ///
  88. /// This returns null if it did not change anything and it permits no
  89. /// simplification. This returns V itself if it did some simplification of V's
  90. /// operands based on the information about what bits are demanded. This returns
  91. /// some other non-null value if it found out that V is equal to another value
  92. /// in the context where the specified bits are demanded, but not for all users.
  93. Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
  94. KnownBits &Known,
  95. unsigned Depth,
  96. Instruction *CxtI) {
  97. assert(V != nullptr && "Null pointer of Value???");
  98. assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
  99. uint32_t BitWidth = DemandedMask.getBitWidth();
  100. Type *VTy = V->getType();
  101. assert(
  102. (!VTy->isIntOrIntVectorTy() || VTy->getScalarSizeInBits() == BitWidth) &&
  103. Known.getBitWidth() == BitWidth &&
  104. "Value *V, DemandedMask and Known must have same BitWidth");
  105. if (isa<Constant>(V)) {
  106. computeKnownBits(V, Known, Depth, CxtI);
  107. return nullptr;
  108. }
  109. Known.resetAll();
  110. if (DemandedMask.isZero()) // Not demanding any bits from V.
  111. return UndefValue::get(VTy);
  112. if (Depth == MaxAnalysisRecursionDepth)
  113. return nullptr;
  114. if (isa<ScalableVectorType>(VTy))
  115. return nullptr;
  116. Instruction *I = dyn_cast<Instruction>(V);
  117. if (!I) {
  118. computeKnownBits(V, Known, Depth, CxtI);
  119. return nullptr; // Only analyze instructions.
  120. }
  121. // If there are multiple uses of this value and we aren't at the root, then
  122. // we can't do any simplifications of the operands, because DemandedMask
  123. // only reflects the bits demanded by *one* of the users.
  124. if (Depth != 0 && !I->hasOneUse())
  125. return SimplifyMultipleUseDemandedBits(I, DemandedMask, Known, Depth, CxtI);
  126. KnownBits LHSKnown(BitWidth), RHSKnown(BitWidth);
  127. // If this is the root being simplified, allow it to have multiple uses,
  128. // just set the DemandedMask to all bits so that we can try to simplify the
  129. // operands. This allows visitTruncInst (for example) to simplify the
  130. // operand of a trunc without duplicating all the logic below.
  131. if (Depth == 0 && !V->hasOneUse())
  132. DemandedMask.setAllBits();
  133. switch (I->getOpcode()) {
  134. default:
  135. computeKnownBits(I, Known, Depth, CxtI);
  136. break;
  137. case Instruction::And: {
  138. // If either the LHS or the RHS are Zero, the result is zero.
  139. if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Depth + 1) ||
  140. SimplifyDemandedBits(I, 0, DemandedMask & ~RHSKnown.Zero, LHSKnown,
  141. Depth + 1))
  142. return I;
  143. assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?");
  144. assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?");
  145. Known = LHSKnown & RHSKnown;
  146. // If the client is only demanding bits that we know, return the known
  147. // constant.
  148. if (DemandedMask.isSubsetOf(Known.Zero | Known.One))
  149. return Constant::getIntegerValue(VTy, Known.One);
  150. // If all of the demanded bits are known 1 on one side, return the other.
  151. // These bits cannot contribute to the result of the 'and'.
  152. if (DemandedMask.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
  153. return I->getOperand(0);
  154. if (DemandedMask.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
  155. return I->getOperand(1);
  156. // If the RHS is a constant, see if we can simplify it.
  157. if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnown.Zero))
  158. return I;
  159. break;
  160. }
  161. case Instruction::Or: {
  162. // If either the LHS or the RHS are One, the result is One.
  163. if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Depth + 1) ||
  164. SimplifyDemandedBits(I, 0, DemandedMask & ~RHSKnown.One, LHSKnown,
  165. Depth + 1))
  166. return I;
  167. assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?");
  168. assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?");
  169. Known = LHSKnown | RHSKnown;
  170. // If the client is only demanding bits that we know, return the known
  171. // constant.
  172. if (DemandedMask.isSubsetOf(Known.Zero | Known.One))
  173. return Constant::getIntegerValue(VTy, Known.One);
  174. // If all of the demanded bits are known zero on one side, return the other.
  175. // These bits cannot contribute to the result of the 'or'.
  176. if (DemandedMask.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
  177. return I->getOperand(0);
  178. if (DemandedMask.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
  179. return I->getOperand(1);
  180. // If the RHS is a constant, see if we can simplify it.
  181. if (ShrinkDemandedConstant(I, 1, DemandedMask))
  182. return I;
  183. break;
  184. }
  185. case Instruction::Xor: {
  186. if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Depth + 1) ||
  187. SimplifyDemandedBits(I, 0, DemandedMask, LHSKnown, Depth + 1))
  188. return I;
  189. Value *LHS, *RHS;
  190. if (DemandedMask == 1 &&
  191. match(I->getOperand(0), m_Intrinsic<Intrinsic::ctpop>(m_Value(LHS))) &&
  192. match(I->getOperand(1), m_Intrinsic<Intrinsic::ctpop>(m_Value(RHS)))) {
  193. // (ctpop(X) ^ ctpop(Y)) & 1 --> ctpop(X^Y) & 1
  194. IRBuilderBase::InsertPointGuard Guard(Builder);
  195. Builder.SetInsertPoint(I);
  196. auto *Xor = Builder.CreateXor(LHS, RHS);
  197. return Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, Xor);
  198. }
  199. assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?");
  200. assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?");
  201. Known = LHSKnown ^ RHSKnown;
  202. // If the client is only demanding bits that we know, return the known
  203. // constant.
  204. if (DemandedMask.isSubsetOf(Known.Zero | Known.One))
  205. return Constant::getIntegerValue(VTy, Known.One);
  206. // If all of the demanded bits are known zero on one side, return the other.
  207. // These bits cannot contribute to the result of the 'xor'.
  208. if (DemandedMask.isSubsetOf(RHSKnown.Zero))
  209. return I->getOperand(0);
  210. if (DemandedMask.isSubsetOf(LHSKnown.Zero))
  211. return I->getOperand(1);
  212. // If all of the demanded bits are known to be zero on one side or the
  213. // other, turn this into an *inclusive* or.
  214. // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
  215. if (DemandedMask.isSubsetOf(RHSKnown.Zero | LHSKnown.Zero)) {
  216. Instruction *Or =
  217. BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
  218. I->getName());
  219. return InsertNewInstWith(Or, *I);
  220. }
  221. // If all of the demanded bits on one side are known, and all of the set
  222. // bits on that side are also known to be set on the other side, turn this
  223. // into an AND, as we know the bits will be cleared.
  224. // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
  225. if (DemandedMask.isSubsetOf(RHSKnown.Zero|RHSKnown.One) &&
  226. RHSKnown.One.isSubsetOf(LHSKnown.One)) {
  227. Constant *AndC = Constant::getIntegerValue(VTy,
  228. ~RHSKnown.One & DemandedMask);
  229. Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
  230. return InsertNewInstWith(And, *I);
  231. }
  232. // If the RHS is a constant, see if we can change it. Don't alter a -1
  233. // constant because that's a canonical 'not' op, and that is better for
  234. // combining, SCEV, and codegen.
  235. const APInt *C;
  236. if (match(I->getOperand(1), m_APInt(C)) && !C->isAllOnes()) {
  237. if ((*C | ~DemandedMask).isAllOnes()) {
  238. // Force bits to 1 to create a 'not' op.
  239. I->setOperand(1, ConstantInt::getAllOnesValue(VTy));
  240. return I;
  241. }
  242. // If we can't turn this into a 'not', try to shrink the constant.
  243. if (ShrinkDemandedConstant(I, 1, DemandedMask))
  244. return I;
  245. }
  246. // If our LHS is an 'and' and if it has one use, and if any of the bits we
  247. // are flipping are known to be set, then the xor is just resetting those
  248. // bits to zero. We can just knock out bits from the 'and' and the 'xor',
  249. // simplifying both of them.
  250. if (Instruction *LHSInst = dyn_cast<Instruction>(I->getOperand(0))) {
  251. ConstantInt *AndRHS, *XorRHS;
  252. if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() &&
  253. match(I->getOperand(1), m_ConstantInt(XorRHS)) &&
  254. match(LHSInst->getOperand(1), m_ConstantInt(AndRHS)) &&
  255. (LHSKnown.One & RHSKnown.One & DemandedMask) != 0) {
  256. APInt NewMask = ~(LHSKnown.One & RHSKnown.One & DemandedMask);
  257. Constant *AndC =
  258. ConstantInt::get(I->getType(), NewMask & AndRHS->getValue());
  259. Instruction *NewAnd = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
  260. InsertNewInstWith(NewAnd, *I);
  261. Constant *XorC =
  262. ConstantInt::get(I->getType(), NewMask & XorRHS->getValue());
  263. Instruction *NewXor = BinaryOperator::CreateXor(NewAnd, XorC);
  264. return InsertNewInstWith(NewXor, *I);
  265. }
  266. }
  267. break;
  268. }
  269. case Instruction::Select: {
  270. Value *LHS, *RHS;
  271. SelectPatternFlavor SPF = matchSelectPattern(I, LHS, RHS).Flavor;
  272. if (SPF == SPF_UMAX) {
  273. // UMax(A, C) == A if ...
  274. // The lowest non-zero bit of DemandMask is higher than the highest
  275. // non-zero bit of C.
  276. const APInt *C;
  277. unsigned CTZ = DemandedMask.countTrailingZeros();
  278. if (match(RHS, m_APInt(C)) && CTZ >= C->getActiveBits())
  279. return LHS;
  280. } else if (SPF == SPF_UMIN) {
  281. // UMin(A, C) == A if ...
  282. // The lowest non-zero bit of DemandMask is higher than the highest
  283. // non-one bit of C.
  284. // This comes from using DeMorgans on the above umax example.
  285. const APInt *C;
  286. unsigned CTZ = DemandedMask.countTrailingZeros();
  287. if (match(RHS, m_APInt(C)) &&
  288. CTZ >= C->getBitWidth() - C->countLeadingOnes())
  289. return LHS;
  290. }
  291. // If this is a select as part of any other min/max pattern, don't simplify
  292. // any further in case we break the structure.
  293. if (SPF != SPF_UNKNOWN)
  294. return nullptr;
  295. if (SimplifyDemandedBits(I, 2, DemandedMask, RHSKnown, Depth + 1) ||
  296. SimplifyDemandedBits(I, 1, DemandedMask, LHSKnown, Depth + 1))
  297. return I;
  298. assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?");
  299. assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?");
  300. // If the operands are constants, see if we can simplify them.
  301. // This is similar to ShrinkDemandedConstant, but for a select we want to
  302. // try to keep the selected constants the same as icmp value constants, if
  303. // we can. This helps not break apart (or helps put back together)
  304. // canonical patterns like min and max.
  305. auto CanonicalizeSelectConstant = [](Instruction *I, unsigned OpNo,
  306. const APInt &DemandedMask) {
  307. const APInt *SelC;
  308. if (!match(I->getOperand(OpNo), m_APInt(SelC)))
  309. return false;
  310. // Get the constant out of the ICmp, if there is one.
  311. // Only try this when exactly 1 operand is a constant (if both operands
  312. // are constant, the icmp should eventually simplify). Otherwise, we may
  313. // invert the transform that reduces set bits and infinite-loop.
  314. Value *X;
  315. const APInt *CmpC;
  316. ICmpInst::Predicate Pred;
  317. if (!match(I->getOperand(0), m_ICmp(Pred, m_Value(X), m_APInt(CmpC))) ||
  318. isa<Constant>(X) || CmpC->getBitWidth() != SelC->getBitWidth())
  319. return ShrinkDemandedConstant(I, OpNo, DemandedMask);
  320. // If the constant is already the same as the ICmp, leave it as-is.
  321. if (*CmpC == *SelC)
  322. return false;
  323. // If the constants are not already the same, but can be with the demand
  324. // mask, use the constant value from the ICmp.
  325. if ((*CmpC & DemandedMask) == (*SelC & DemandedMask)) {
  326. I->setOperand(OpNo, ConstantInt::get(I->getType(), *CmpC));
  327. return true;
  328. }
  329. return ShrinkDemandedConstant(I, OpNo, DemandedMask);
  330. };
  331. if (CanonicalizeSelectConstant(I, 1, DemandedMask) ||
  332. CanonicalizeSelectConstant(I, 2, DemandedMask))
  333. return I;
  334. // Only known if known in both the LHS and RHS.
  335. Known = KnownBits::commonBits(LHSKnown, RHSKnown);
  336. break;
  337. }
  338. case Instruction::Trunc: {
  339. // If we do not demand the high bits of a right-shifted and truncated value,
  340. // then we may be able to truncate it before the shift.
  341. Value *X;
  342. const APInt *C;
  343. if (match(I->getOperand(0), m_OneUse(m_LShr(m_Value(X), m_APInt(C))))) {
  344. // The shift amount must be valid (not poison) in the narrow type, and
  345. // it must not be greater than the high bits demanded of the result.
  346. if (C->ult(I->getType()->getScalarSizeInBits()) &&
  347. C->ule(DemandedMask.countLeadingZeros())) {
  348. // trunc (lshr X, C) --> lshr (trunc X), C
  349. IRBuilderBase::InsertPointGuard Guard(Builder);
  350. Builder.SetInsertPoint(I);
  351. Value *Trunc = Builder.CreateTrunc(X, I->getType());
  352. return Builder.CreateLShr(Trunc, C->getZExtValue());
  353. }
  354. }
  355. }
  356. LLVM_FALLTHROUGH;
  357. case Instruction::ZExt: {
  358. unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();
  359. APInt InputDemandedMask = DemandedMask.zextOrTrunc(SrcBitWidth);
  360. KnownBits InputKnown(SrcBitWidth);
  361. if (SimplifyDemandedBits(I, 0, InputDemandedMask, InputKnown, Depth + 1))
  362. return I;
  363. assert(InputKnown.getBitWidth() == SrcBitWidth && "Src width changed?");
  364. Known = InputKnown.zextOrTrunc(BitWidth);
  365. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  366. break;
  367. }
  368. case Instruction::BitCast:
  369. if (!I->getOperand(0)->getType()->isIntOrIntVectorTy())
  370. return nullptr; // vector->int or fp->int?
  371. if (VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) {
  372. if (VectorType *SrcVTy =
  373. dyn_cast<VectorType>(I->getOperand(0)->getType())) {
  374. if (cast<FixedVectorType>(DstVTy)->getNumElements() !=
  375. cast<FixedVectorType>(SrcVTy)->getNumElements())
  376. // Don't touch a bitcast between vectors of different element counts.
  377. return nullptr;
  378. } else
  379. // Don't touch a scalar-to-vector bitcast.
  380. return nullptr;
  381. } else if (I->getOperand(0)->getType()->isVectorTy())
  382. // Don't touch a vector-to-scalar bitcast.
  383. return nullptr;
  384. if (SimplifyDemandedBits(I, 0, DemandedMask, Known, Depth + 1))
  385. return I;
  386. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  387. break;
  388. case Instruction::SExt: {
  389. // Compute the bits in the result that are not present in the input.
  390. unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();
  391. APInt InputDemandedBits = DemandedMask.trunc(SrcBitWidth);
  392. // If any of the sign extended bits are demanded, we know that the sign
  393. // bit is demanded.
  394. if (DemandedMask.getActiveBits() > SrcBitWidth)
  395. InputDemandedBits.setBit(SrcBitWidth-1);
  396. KnownBits InputKnown(SrcBitWidth);
  397. if (SimplifyDemandedBits(I, 0, InputDemandedBits, InputKnown, Depth + 1))
  398. return I;
  399. // If the input sign bit is known zero, or if the NewBits are not demanded
  400. // convert this into a zero extension.
  401. if (InputKnown.isNonNegative() ||
  402. DemandedMask.getActiveBits() <= SrcBitWidth) {
  403. // Convert to ZExt cast.
  404. CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName());
  405. return InsertNewInstWith(NewCast, *I);
  406. }
  407. // If the sign bit of the input is known set or clear, then we know the
  408. // top bits of the result.
  409. Known = InputKnown.sext(BitWidth);
  410. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  411. break;
  412. }
  413. case Instruction::Add:
  414. if ((DemandedMask & 1) == 0) {
  415. // If we do not need the low bit, try to convert bool math to logic:
  416. // add iN (zext i1 X), (sext i1 Y) --> sext (~X & Y) to iN
  417. Value *X, *Y;
  418. if (match(I, m_c_Add(m_OneUse(m_ZExt(m_Value(X))),
  419. m_OneUse(m_SExt(m_Value(Y))))) &&
  420. X->getType()->isIntOrIntVectorTy(1) && X->getType() == Y->getType()) {
  421. // Truth table for inputs and output signbits:
  422. // X:0 | X:1
  423. // ----------
  424. // Y:0 | 0 | 0 |
  425. // Y:1 | -1 | 0 |
  426. // ----------
  427. IRBuilderBase::InsertPointGuard Guard(Builder);
  428. Builder.SetInsertPoint(I);
  429. Value *AndNot = Builder.CreateAnd(Builder.CreateNot(X), Y);
  430. return Builder.CreateSExt(AndNot, VTy);
  431. }
  432. // add iN (sext i1 X), (sext i1 Y) --> sext (X | Y) to iN
  433. // TODO: Relax the one-use checks because we are removing an instruction?
  434. if (match(I, m_Add(m_OneUse(m_SExt(m_Value(X))),
  435. m_OneUse(m_SExt(m_Value(Y))))) &&
  436. X->getType()->isIntOrIntVectorTy(1) && X->getType() == Y->getType()) {
  437. // Truth table for inputs and output signbits:
  438. // X:0 | X:1
  439. // -----------
  440. // Y:0 | -1 | -1 |
  441. // Y:1 | -1 | 0 |
  442. // -----------
  443. IRBuilderBase::InsertPointGuard Guard(Builder);
  444. Builder.SetInsertPoint(I);
  445. Value *Or = Builder.CreateOr(X, Y);
  446. return Builder.CreateSExt(Or, VTy);
  447. }
  448. }
  449. LLVM_FALLTHROUGH;
  450. case Instruction::Sub: {
  451. /// If the high-bits of an ADD/SUB are not demanded, then we do not care
  452. /// about the high bits of the operands.
  453. unsigned NLZ = DemandedMask.countLeadingZeros();
  454. // Right fill the mask of bits for this ADD/SUB to demand the most
  455. // significant bit and all those below it.
  456. APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
  457. if (ShrinkDemandedConstant(I, 0, DemandedFromOps) ||
  458. SimplifyDemandedBits(I, 0, DemandedFromOps, LHSKnown, Depth + 1) ||
  459. ShrinkDemandedConstant(I, 1, DemandedFromOps) ||
  460. SimplifyDemandedBits(I, 1, DemandedFromOps, RHSKnown, Depth + 1)) {
  461. if (NLZ > 0) {
  462. // Disable the nsw and nuw flags here: We can no longer guarantee that
  463. // we won't wrap after simplification. Removing the nsw/nuw flags is
  464. // legal here because the top bit is not demanded.
  465. BinaryOperator &BinOP = *cast<BinaryOperator>(I);
  466. BinOP.setHasNoSignedWrap(false);
  467. BinOP.setHasNoUnsignedWrap(false);
  468. }
  469. return I;
  470. }
  471. // If we are known to be adding/subtracting zeros to every bit below
  472. // the highest demanded bit, we just return the other side.
  473. if (DemandedFromOps.isSubsetOf(RHSKnown.Zero))
  474. return I->getOperand(0);
  475. // We can't do this with the LHS for subtraction, unless we are only
  476. // demanding the LSB.
  477. if ((I->getOpcode() == Instruction::Add || DemandedFromOps.isOne()) &&
  478. DemandedFromOps.isSubsetOf(LHSKnown.Zero))
  479. return I->getOperand(1);
  480. // Otherwise just compute the known bits of the result.
  481. bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
  482. Known = KnownBits::computeForAddSub(I->getOpcode() == Instruction::Add,
  483. NSW, LHSKnown, RHSKnown);
  484. break;
  485. }
  486. case Instruction::Shl: {
  487. const APInt *SA;
  488. if (match(I->getOperand(1), m_APInt(SA))) {
  489. const APInt *ShrAmt;
  490. if (match(I->getOperand(0), m_Shr(m_Value(), m_APInt(ShrAmt))))
  491. if (Instruction *Shr = dyn_cast<Instruction>(I->getOperand(0)))
  492. if (Value *R = simplifyShrShlDemandedBits(Shr, *ShrAmt, I, *SA,
  493. DemandedMask, Known))
  494. return R;
  495. uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
  496. APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt));
  497. // If the shift is NUW/NSW, then it does demand the high bits.
  498. ShlOperator *IOp = cast<ShlOperator>(I);
  499. if (IOp->hasNoSignedWrap())
  500. DemandedMaskIn.setHighBits(ShiftAmt+1);
  501. else if (IOp->hasNoUnsignedWrap())
  502. DemandedMaskIn.setHighBits(ShiftAmt);
  503. if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1))
  504. return I;
  505. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  506. bool SignBitZero = Known.Zero.isSignBitSet();
  507. bool SignBitOne = Known.One.isSignBitSet();
  508. Known.Zero <<= ShiftAmt;
  509. Known.One <<= ShiftAmt;
  510. // low bits known zero.
  511. if (ShiftAmt)
  512. Known.Zero.setLowBits(ShiftAmt);
  513. // If this shift has "nsw" keyword, then the result is either a poison
  514. // value or has the same sign bit as the first operand.
  515. if (IOp->hasNoSignedWrap()) {
  516. if (SignBitZero)
  517. Known.Zero.setSignBit();
  518. else if (SignBitOne)
  519. Known.One.setSignBit();
  520. if (Known.hasConflict())
  521. return UndefValue::get(I->getType());
  522. }
  523. } else {
  524. // This is a variable shift, so we can't shift the demand mask by a known
  525. // amount. But if we are not demanding high bits, then we are not
  526. // demanding those bits from the pre-shifted operand either.
  527. if (unsigned CTLZ = DemandedMask.countLeadingZeros()) {
  528. APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
  529. if (SimplifyDemandedBits(I, 0, DemandedFromOp, Known, Depth + 1)) {
  530. // We can't guarantee that nsw/nuw hold after simplifying the operand.
  531. I->dropPoisonGeneratingFlags();
  532. return I;
  533. }
  534. }
  535. computeKnownBits(I, Known, Depth, CxtI);
  536. }
  537. break;
  538. }
  539. case Instruction::LShr: {
  540. const APInt *SA;
  541. if (match(I->getOperand(1), m_APInt(SA))) {
  542. uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
  543. // Unsigned shift right.
  544. APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
  545. // If the shift is exact, then it does demand the low bits (and knows that
  546. // they are zero).
  547. if (cast<LShrOperator>(I)->isExact())
  548. DemandedMaskIn.setLowBits(ShiftAmt);
  549. if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1))
  550. return I;
  551. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  552. Known.Zero.lshrInPlace(ShiftAmt);
  553. Known.One.lshrInPlace(ShiftAmt);
  554. if (ShiftAmt)
  555. Known.Zero.setHighBits(ShiftAmt); // high bits known zero.
  556. } else {
  557. computeKnownBits(I, Known, Depth, CxtI);
  558. }
  559. break;
  560. }
  561. case Instruction::AShr: {
  562. // If this is an arithmetic shift right and only the low-bit is set, we can
  563. // always convert this into a logical shr, even if the shift amount is
  564. // variable. The low bit of the shift cannot be an input sign bit unless
  565. // the shift amount is >= the size of the datatype, which is undefined.
  566. if (DemandedMask.isOne()) {
  567. // Perform the logical shift right.
  568. Instruction *NewVal = BinaryOperator::CreateLShr(
  569. I->getOperand(0), I->getOperand(1), I->getName());
  570. return InsertNewInstWith(NewVal, *I);
  571. }
  572. // If the sign bit is the only bit demanded by this ashr, then there is no
  573. // need to do it, the shift doesn't change the high bit.
  574. if (DemandedMask.isSignMask())
  575. return I->getOperand(0);
  576. const APInt *SA;
  577. if (match(I->getOperand(1), m_APInt(SA))) {
  578. uint32_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
  579. // Signed shift right.
  580. APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
  581. // If any of the high bits are demanded, we should set the sign bit as
  582. // demanded.
  583. if (DemandedMask.countLeadingZeros() <= ShiftAmt)
  584. DemandedMaskIn.setSignBit();
  585. // If the shift is exact, then it does demand the low bits (and knows that
  586. // they are zero).
  587. if (cast<AShrOperator>(I)->isExact())
  588. DemandedMaskIn.setLowBits(ShiftAmt);
  589. if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1))
  590. return I;
  591. unsigned SignBits = ComputeNumSignBits(I->getOperand(0), Depth + 1, CxtI);
  592. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  593. // Compute the new bits that are at the top now plus sign bits.
  594. APInt HighBits(APInt::getHighBitsSet(
  595. BitWidth, std::min(SignBits + ShiftAmt - 1, BitWidth)));
  596. Known.Zero.lshrInPlace(ShiftAmt);
  597. Known.One.lshrInPlace(ShiftAmt);
  598. // If the input sign bit is known to be zero, or if none of the top bits
  599. // are demanded, turn this into an unsigned shift right.
  600. assert(BitWidth > ShiftAmt && "Shift amount not saturated?");
  601. if (Known.Zero[BitWidth-ShiftAmt-1] ||
  602. !DemandedMask.intersects(HighBits)) {
  603. BinaryOperator *LShr = BinaryOperator::CreateLShr(I->getOperand(0),
  604. I->getOperand(1));
  605. LShr->setIsExact(cast<BinaryOperator>(I)->isExact());
  606. return InsertNewInstWith(LShr, *I);
  607. } else if (Known.One[BitWidth-ShiftAmt-1]) { // New bits are known one.
  608. Known.One |= HighBits;
  609. }
  610. } else {
  611. computeKnownBits(I, Known, Depth, CxtI);
  612. }
  613. break;
  614. }
  615. case Instruction::UDiv: {
  616. // UDiv doesn't demand low bits that are zero in the divisor.
  617. const APInt *SA;
  618. if (match(I->getOperand(1), m_APInt(SA))) {
  619. // If the shift is exact, then it does demand the low bits.
  620. if (cast<UDivOperator>(I)->isExact())
  621. break;
  622. // FIXME: Take the demanded mask of the result into account.
  623. unsigned RHSTrailingZeros = SA->countTrailingZeros();
  624. APInt DemandedMaskIn =
  625. APInt::getHighBitsSet(BitWidth, BitWidth - RHSTrailingZeros);
  626. if (SimplifyDemandedBits(I, 0, DemandedMaskIn, LHSKnown, Depth + 1))
  627. return I;
  628. // Propagate zero bits from the input.
  629. Known.Zero.setHighBits(std::min(
  630. BitWidth, LHSKnown.Zero.countLeadingOnes() + RHSTrailingZeros));
  631. } else {
  632. computeKnownBits(I, Known, Depth, CxtI);
  633. }
  634. break;
  635. }
  636. case Instruction::SRem: {
  637. ConstantInt *Rem;
  638. if (match(I->getOperand(1), m_ConstantInt(Rem))) {
  639. // X % -1 demands all the bits because we don't want to introduce
  640. // INT_MIN % -1 (== undef) by accident.
  641. if (Rem->isMinusOne())
  642. break;
  643. APInt RA = Rem->getValue().abs();
  644. if (RA.isPowerOf2()) {
  645. if (DemandedMask.ult(RA)) // srem won't affect demanded bits
  646. return I->getOperand(0);
  647. APInt LowBits = RA - 1;
  648. APInt Mask2 = LowBits | APInt::getSignMask(BitWidth);
  649. if (SimplifyDemandedBits(I, 0, Mask2, LHSKnown, Depth + 1))
  650. return I;
  651. // The low bits of LHS are unchanged by the srem.
  652. Known.Zero = LHSKnown.Zero & LowBits;
  653. Known.One = LHSKnown.One & LowBits;
  654. // If LHS is non-negative or has all low bits zero, then the upper bits
  655. // are all zero.
  656. if (LHSKnown.isNonNegative() || LowBits.isSubsetOf(LHSKnown.Zero))
  657. Known.Zero |= ~LowBits;
  658. // If LHS is negative and not all low bits are zero, then the upper bits
  659. // are all one.
  660. if (LHSKnown.isNegative() && LowBits.intersects(LHSKnown.One))
  661. Known.One |= ~LowBits;
  662. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  663. break;
  664. }
  665. }
  666. // The sign bit is the LHS's sign bit, except when the result of the
  667. // remainder is zero.
  668. if (DemandedMask.isSignBitSet()) {
  669. computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI);
  670. // If it's known zero, our sign bit is also zero.
  671. if (LHSKnown.isNonNegative())
  672. Known.makeNonNegative();
  673. }
  674. break;
  675. }
  676. case Instruction::URem: {
  677. KnownBits Known2(BitWidth);
  678. APInt AllOnes = APInt::getAllOnes(BitWidth);
  679. if (SimplifyDemandedBits(I, 0, AllOnes, Known2, Depth + 1) ||
  680. SimplifyDemandedBits(I, 1, AllOnes, Known2, Depth + 1))
  681. return I;
  682. unsigned Leaders = Known2.countMinLeadingZeros();
  683. Known.Zero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask;
  684. break;
  685. }
  686. case Instruction::Call: {
  687. bool KnownBitsComputed = false;
  688. if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
  689. switch (II->getIntrinsicID()) {
  690. case Intrinsic::abs: {
  691. if (DemandedMask == 1)
  692. return II->getArgOperand(0);
  693. break;
  694. }
  695. case Intrinsic::ctpop: {
  696. // Checking if the number of clear bits is odd (parity)? If the type has
  697. // an even number of bits, that's the same as checking if the number of
  698. // set bits is odd, so we can eliminate the 'not' op.
  699. Value *X;
  700. if (DemandedMask == 1 && VTy->getScalarSizeInBits() % 2 == 0 &&
  701. match(II->getArgOperand(0), m_Not(m_Value(X)))) {
  702. Function *Ctpop = Intrinsic::getDeclaration(
  703. II->getModule(), Intrinsic::ctpop, II->getType());
  704. return InsertNewInstWith(CallInst::Create(Ctpop, {X}), *I);
  705. }
  706. break;
  707. }
  708. case Intrinsic::bswap: {
  709. // If the only bits demanded come from one byte of the bswap result,
  710. // just shift the input byte into position to eliminate the bswap.
  711. unsigned NLZ = DemandedMask.countLeadingZeros();
  712. unsigned NTZ = DemandedMask.countTrailingZeros();
  713. // Round NTZ down to the next byte. If we have 11 trailing zeros, then
  714. // we need all the bits down to bit 8. Likewise, round NLZ. If we
  715. // have 14 leading zeros, round to 8.
  716. NLZ = alignDown(NLZ, 8);
  717. NTZ = alignDown(NTZ, 8);
  718. // If we need exactly one byte, we can do this transformation.
  719. if (BitWidth - NLZ - NTZ == 8) {
  720. // Replace this with either a left or right shift to get the byte into
  721. // the right place.
  722. Instruction *NewVal;
  723. if (NLZ > NTZ)
  724. NewVal = BinaryOperator::CreateLShr(
  725. II->getArgOperand(0),
  726. ConstantInt::get(I->getType(), NLZ - NTZ));
  727. else
  728. NewVal = BinaryOperator::CreateShl(
  729. II->getArgOperand(0),
  730. ConstantInt::get(I->getType(), NTZ - NLZ));
  731. NewVal->takeName(I);
  732. return InsertNewInstWith(NewVal, *I);
  733. }
  734. break;
  735. }
  736. case Intrinsic::fshr:
  737. case Intrinsic::fshl: {
  738. const APInt *SA;
  739. if (!match(I->getOperand(2), m_APInt(SA)))
  740. break;
  741. // Normalize to funnel shift left. APInt shifts of BitWidth are well-
  742. // defined, so no need to special-case zero shifts here.
  743. uint64_t ShiftAmt = SA->urem(BitWidth);
  744. if (II->getIntrinsicID() == Intrinsic::fshr)
  745. ShiftAmt = BitWidth - ShiftAmt;
  746. APInt DemandedMaskLHS(DemandedMask.lshr(ShiftAmt));
  747. APInt DemandedMaskRHS(DemandedMask.shl(BitWidth - ShiftAmt));
  748. if (SimplifyDemandedBits(I, 0, DemandedMaskLHS, LHSKnown, Depth + 1) ||
  749. SimplifyDemandedBits(I, 1, DemandedMaskRHS, RHSKnown, Depth + 1))
  750. return I;
  751. Known.Zero = LHSKnown.Zero.shl(ShiftAmt) |
  752. RHSKnown.Zero.lshr(BitWidth - ShiftAmt);
  753. Known.One = LHSKnown.One.shl(ShiftAmt) |
  754. RHSKnown.One.lshr(BitWidth - ShiftAmt);
  755. KnownBitsComputed = true;
  756. break;
  757. }
  758. case Intrinsic::umax: {
  759. // UMax(A, C) == A if ...
  760. // The lowest non-zero bit of DemandMask is higher than the highest
  761. // non-zero bit of C.
  762. const APInt *C;
  763. unsigned CTZ = DemandedMask.countTrailingZeros();
  764. if (match(II->getArgOperand(1), m_APInt(C)) &&
  765. CTZ >= C->getActiveBits())
  766. return II->getArgOperand(0);
  767. break;
  768. }
  769. case Intrinsic::umin: {
  770. // UMin(A, C) == A if ...
  771. // The lowest non-zero bit of DemandMask is higher than the highest
  772. // non-one bit of C.
  773. // This comes from using DeMorgans on the above umax example.
  774. const APInt *C;
  775. unsigned CTZ = DemandedMask.countTrailingZeros();
  776. if (match(II->getArgOperand(1), m_APInt(C)) &&
  777. CTZ >= C->getBitWidth() - C->countLeadingOnes())
  778. return II->getArgOperand(0);
  779. break;
  780. }
  781. default: {
  782. // Handle target specific intrinsics
  783. Optional<Value *> V = targetSimplifyDemandedUseBitsIntrinsic(
  784. *II, DemandedMask, Known, KnownBitsComputed);
  785. if (V.hasValue())
  786. return V.getValue();
  787. break;
  788. }
  789. }
  790. }
  791. if (!KnownBitsComputed)
  792. computeKnownBits(V, Known, Depth, CxtI);
  793. break;
  794. }
  795. }
  796. // If the client is only demanding bits that we know, return the known
  797. // constant.
  798. if (DemandedMask.isSubsetOf(Known.Zero|Known.One))
  799. return Constant::getIntegerValue(VTy, Known.One);
  800. return nullptr;
  801. }
  802. /// Helper routine of SimplifyDemandedUseBits. It computes Known
  803. /// bits. It also tries to handle simplifications that can be done based on
  804. /// DemandedMask, but without modifying the Instruction.
  805. Value *InstCombinerImpl::SimplifyMultipleUseDemandedBits(
  806. Instruction *I, const APInt &DemandedMask, KnownBits &Known, unsigned Depth,
  807. Instruction *CxtI) {
  808. unsigned BitWidth = DemandedMask.getBitWidth();
  809. Type *ITy = I->getType();
  810. KnownBits LHSKnown(BitWidth);
  811. KnownBits RHSKnown(BitWidth);
  812. // Despite the fact that we can't simplify this instruction in all User's
  813. // context, we can at least compute the known bits, and we can
  814. // do simplifications that apply to *just* the one user if we know that
  815. // this instruction has a simpler value in that context.
  816. switch (I->getOpcode()) {
  817. case Instruction::And: {
  818. // If either the LHS or the RHS are Zero, the result is zero.
  819. computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI);
  820. computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1,
  821. CxtI);
  822. Known = LHSKnown & RHSKnown;
  823. // If the client is only demanding bits that we know, return the known
  824. // constant.
  825. if (DemandedMask.isSubsetOf(Known.Zero | Known.One))
  826. return Constant::getIntegerValue(ITy, Known.One);
  827. // If all of the demanded bits are known 1 on one side, return the other.
  828. // These bits cannot contribute to the result of the 'and' in this
  829. // context.
  830. if (DemandedMask.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
  831. return I->getOperand(0);
  832. if (DemandedMask.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
  833. return I->getOperand(1);
  834. break;
  835. }
  836. case Instruction::Or: {
  837. // We can simplify (X|Y) -> X or Y in the user's context if we know that
  838. // only bits from X or Y are demanded.
  839. // If either the LHS or the RHS are One, the result is One.
  840. computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI);
  841. computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1,
  842. CxtI);
  843. Known = LHSKnown | RHSKnown;
  844. // If the client is only demanding bits that we know, return the known
  845. // constant.
  846. if (DemandedMask.isSubsetOf(Known.Zero | Known.One))
  847. return Constant::getIntegerValue(ITy, Known.One);
  848. // If all of the demanded bits are known zero on one side, return the
  849. // other. These bits cannot contribute to the result of the 'or' in this
  850. // context.
  851. if (DemandedMask.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
  852. return I->getOperand(0);
  853. if (DemandedMask.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
  854. return I->getOperand(1);
  855. break;
  856. }
  857. case Instruction::Xor: {
  858. // We can simplify (X^Y) -> X or Y in the user's context if we know that
  859. // only bits from X or Y are demanded.
  860. computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI);
  861. computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1,
  862. CxtI);
  863. Known = LHSKnown ^ RHSKnown;
  864. // If the client is only demanding bits that we know, return the known
  865. // constant.
  866. if (DemandedMask.isSubsetOf(Known.Zero | Known.One))
  867. return Constant::getIntegerValue(ITy, Known.One);
  868. // If all of the demanded bits are known zero on one side, return the
  869. // other.
  870. if (DemandedMask.isSubsetOf(RHSKnown.Zero))
  871. return I->getOperand(0);
  872. if (DemandedMask.isSubsetOf(LHSKnown.Zero))
  873. return I->getOperand(1);
  874. break;
  875. }
  876. case Instruction::AShr: {
  877. // Compute the Known bits to simplify things downstream.
  878. computeKnownBits(I, Known, Depth, CxtI);
  879. // If this user is only demanding bits that we know, return the known
  880. // constant.
  881. if (DemandedMask.isSubsetOf(Known.Zero | Known.One))
  882. return Constant::getIntegerValue(ITy, Known.One);
  883. // If the right shift operand 0 is a result of a left shift by the same
  884. // amount, this is probably a zero/sign extension, which may be unnecessary,
  885. // if we do not demand any of the new sign bits. So, return the original
  886. // operand instead.
  887. const APInt *ShiftRC;
  888. const APInt *ShiftLC;
  889. Value *X;
  890. unsigned BitWidth = DemandedMask.getBitWidth();
  891. if (match(I,
  892. m_AShr(m_Shl(m_Value(X), m_APInt(ShiftLC)), m_APInt(ShiftRC))) &&
  893. ShiftLC == ShiftRC && ShiftLC->ult(BitWidth) &&
  894. DemandedMask.isSubsetOf(APInt::getLowBitsSet(
  895. BitWidth, BitWidth - ShiftRC->getZExtValue()))) {
  896. return X;
  897. }
  898. break;
  899. }
  900. default:
  901. // Compute the Known bits to simplify things downstream.
  902. computeKnownBits(I, Known, Depth, CxtI);
  903. // If this user is only demanding bits that we know, return the known
  904. // constant.
  905. if (DemandedMask.isSubsetOf(Known.Zero|Known.One))
  906. return Constant::getIntegerValue(ITy, Known.One);
  907. break;
  908. }
  909. return nullptr;
  910. }
  911. /// Helper routine of SimplifyDemandedUseBits. It tries to simplify
  912. /// "E1 = (X lsr C1) << C2", where the C1 and C2 are constant, into
  913. /// "E2 = X << (C2 - C1)" or "E2 = X >> (C1 - C2)", depending on the sign
  914. /// of "C2-C1".
  915. ///
  916. /// Suppose E1 and E2 are generally different in bits S={bm, bm+1,
  917. /// ..., bn}, without considering the specific value X is holding.
  918. /// This transformation is legal iff one of following conditions is hold:
  919. /// 1) All the bit in S are 0, in this case E1 == E2.
  920. /// 2) We don't care those bits in S, per the input DemandedMask.
  921. /// 3) Combination of 1) and 2). Some bits in S are 0, and we don't care the
  922. /// rest bits.
  923. ///
  924. /// Currently we only test condition 2).
  925. ///
  926. /// As with SimplifyDemandedUseBits, it returns NULL if the simplification was
  927. /// not successful.
  928. Value *InstCombinerImpl::simplifyShrShlDemandedBits(
  929. Instruction *Shr, const APInt &ShrOp1, Instruction *Shl,
  930. const APInt &ShlOp1, const APInt &DemandedMask, KnownBits &Known) {
  931. if (!ShlOp1 || !ShrOp1)
  932. return nullptr; // No-op.
  933. Value *VarX = Shr->getOperand(0);
  934. Type *Ty = VarX->getType();
  935. unsigned BitWidth = Ty->getScalarSizeInBits();
  936. if (ShlOp1.uge(BitWidth) || ShrOp1.uge(BitWidth))
  937. return nullptr; // Undef.
  938. unsigned ShlAmt = ShlOp1.getZExtValue();
  939. unsigned ShrAmt = ShrOp1.getZExtValue();
  940. Known.One.clearAllBits();
  941. Known.Zero.setLowBits(ShlAmt - 1);
  942. Known.Zero &= DemandedMask;
  943. APInt BitMask1(APInt::getAllOnes(BitWidth));
  944. APInt BitMask2(APInt::getAllOnes(BitWidth));
  945. bool isLshr = (Shr->getOpcode() == Instruction::LShr);
  946. BitMask1 = isLshr ? (BitMask1.lshr(ShrAmt) << ShlAmt) :
  947. (BitMask1.ashr(ShrAmt) << ShlAmt);
  948. if (ShrAmt <= ShlAmt) {
  949. BitMask2 <<= (ShlAmt - ShrAmt);
  950. } else {
  951. BitMask2 = isLshr ? BitMask2.lshr(ShrAmt - ShlAmt):
  952. BitMask2.ashr(ShrAmt - ShlAmt);
  953. }
  954. // Check if condition-2 (see the comment to this function) is satified.
  955. if ((BitMask1 & DemandedMask) == (BitMask2 & DemandedMask)) {
  956. if (ShrAmt == ShlAmt)
  957. return VarX;
  958. if (!Shr->hasOneUse())
  959. return nullptr;
  960. BinaryOperator *New;
  961. if (ShrAmt < ShlAmt) {
  962. Constant *Amt = ConstantInt::get(VarX->getType(), ShlAmt - ShrAmt);
  963. New = BinaryOperator::CreateShl(VarX, Amt);
  964. BinaryOperator *Orig = cast<BinaryOperator>(Shl);
  965. New->setHasNoSignedWrap(Orig->hasNoSignedWrap());
  966. New->setHasNoUnsignedWrap(Orig->hasNoUnsignedWrap());
  967. } else {
  968. Constant *Amt = ConstantInt::get(VarX->getType(), ShrAmt - ShlAmt);
  969. New = isLshr ? BinaryOperator::CreateLShr(VarX, Amt) :
  970. BinaryOperator::CreateAShr(VarX, Amt);
  971. if (cast<BinaryOperator>(Shr)->isExact())
  972. New->setIsExact(true);
  973. }
  974. return InsertNewInstWith(New, *Shl);
  975. }
  976. return nullptr;
  977. }
  978. /// The specified value produces a vector with any number of elements.
  979. /// This method analyzes which elements of the operand are undef or poison and
  980. /// returns that information in UndefElts.
  981. ///
  982. /// DemandedElts contains the set of elements that are actually used by the
  983. /// caller, and by default (AllowMultipleUsers equals false) the value is
  984. /// simplified only if it has a single caller. If AllowMultipleUsers is set
  985. /// to true, DemandedElts refers to the union of sets of elements that are
  986. /// used by all callers.
  987. ///
  988. /// If the information about demanded elements can be used to simplify the
  989. /// operation, the operation is simplified, then the resultant value is
  990. /// returned. This returns null if no change was made.
  991. Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
  992. APInt DemandedElts,
  993. APInt &UndefElts,
  994. unsigned Depth,
  995. bool AllowMultipleUsers) {
  996. // Cannot analyze scalable type. The number of vector elements is not a
  997. // compile-time constant.
  998. if (isa<ScalableVectorType>(V->getType()))
  999. return nullptr;
  1000. unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
  1001. APInt EltMask(APInt::getAllOnes(VWidth));
  1002. assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!");
  1003. if (match(V, m_Undef())) {
  1004. // If the entire vector is undef or poison, just return this info.
  1005. UndefElts = EltMask;
  1006. return nullptr;
  1007. }
  1008. if (DemandedElts.isZero()) { // If nothing is demanded, provide poison.
  1009. UndefElts = EltMask;
  1010. return PoisonValue::get(V->getType());
  1011. }
  1012. UndefElts = 0;
  1013. if (auto *C = dyn_cast<Constant>(V)) {
  1014. // Check if this is identity. If so, return 0 since we are not simplifying
  1015. // anything.
  1016. if (DemandedElts.isAllOnes())
  1017. return nullptr;
  1018. Type *EltTy = cast<VectorType>(V->getType())->getElementType();
  1019. Constant *Poison = PoisonValue::get(EltTy);
  1020. SmallVector<Constant*, 16> Elts;
  1021. for (unsigned i = 0; i != VWidth; ++i) {
  1022. if (!DemandedElts[i]) { // If not demanded, set to poison.
  1023. Elts.push_back(Poison);
  1024. UndefElts.setBit(i);
  1025. continue;
  1026. }
  1027. Constant *Elt = C->getAggregateElement(i);
  1028. if (!Elt) return nullptr;
  1029. Elts.push_back(Elt);
  1030. if (isa<UndefValue>(Elt)) // Already undef or poison.
  1031. UndefElts.setBit(i);
  1032. }
  1033. // If we changed the constant, return it.
  1034. Constant *NewCV = ConstantVector::get(Elts);
  1035. return NewCV != C ? NewCV : nullptr;
  1036. }
  1037. // Limit search depth.
  1038. if (Depth == 10)
  1039. return nullptr;
  1040. if (!AllowMultipleUsers) {
  1041. // If multiple users are using the root value, proceed with
  1042. // simplification conservatively assuming that all elements
  1043. // are needed.
  1044. if (!V->hasOneUse()) {
  1045. // Quit if we find multiple users of a non-root value though.
  1046. // They'll be handled when it's their turn to be visited by
  1047. // the main instcombine process.
  1048. if (Depth != 0)
  1049. // TODO: Just compute the UndefElts information recursively.
  1050. return nullptr;
  1051. // Conservatively assume that all elements are needed.
  1052. DemandedElts = EltMask;
  1053. }
  1054. }
  1055. Instruction *I = dyn_cast<Instruction>(V);
  1056. if (!I) return nullptr; // Only analyze instructions.
  1057. bool MadeChange = false;
  1058. auto simplifyAndSetOp = [&](Instruction *Inst, unsigned OpNum,
  1059. APInt Demanded, APInt &Undef) {
  1060. auto *II = dyn_cast<IntrinsicInst>(Inst);
  1061. Value *Op = II ? II->getArgOperand(OpNum) : Inst->getOperand(OpNum);
  1062. if (Value *V = SimplifyDemandedVectorElts(Op, Demanded, Undef, Depth + 1)) {
  1063. replaceOperand(*Inst, OpNum, V);
  1064. MadeChange = true;
  1065. }
  1066. };
  1067. APInt UndefElts2(VWidth, 0);
  1068. APInt UndefElts3(VWidth, 0);
  1069. switch (I->getOpcode()) {
  1070. default: break;
  1071. case Instruction::GetElementPtr: {
  1072. // The LangRef requires that struct geps have all constant indices. As
  1073. // such, we can't convert any operand to partial undef.
  1074. auto mayIndexStructType = [](GetElementPtrInst &GEP) {
  1075. for (auto I = gep_type_begin(GEP), E = gep_type_end(GEP);
  1076. I != E; I++)
  1077. if (I.isStruct())
  1078. return true;
  1079. return false;
  1080. };
  1081. if (mayIndexStructType(cast<GetElementPtrInst>(*I)))
  1082. break;
  1083. // Conservatively track the demanded elements back through any vector
  1084. // operands we may have. We know there must be at least one, or we
  1085. // wouldn't have a vector result to get here. Note that we intentionally
  1086. // merge the undef bits here since gepping with either an poison base or
  1087. // index results in poison.
  1088. for (unsigned i = 0; i < I->getNumOperands(); i++) {
  1089. if (i == 0 ? match(I->getOperand(i), m_Undef())
  1090. : match(I->getOperand(i), m_Poison())) {
  1091. // If the entire vector is undefined, just return this info.
  1092. UndefElts = EltMask;
  1093. return nullptr;
  1094. }
  1095. if (I->getOperand(i)->getType()->isVectorTy()) {
  1096. APInt UndefEltsOp(VWidth, 0);
  1097. simplifyAndSetOp(I, i, DemandedElts, UndefEltsOp);
  1098. // gep(x, undef) is not undef, so skip considering idx ops here
  1099. // Note that we could propagate poison, but we can't distinguish between
  1100. // undef & poison bits ATM
  1101. if (i == 0)
  1102. UndefElts |= UndefEltsOp;
  1103. }
  1104. }
  1105. break;
  1106. }
  1107. case Instruction::InsertElement: {
  1108. // If this is a variable index, we don't know which element it overwrites.
  1109. // demand exactly the same input as we produce.
  1110. ConstantInt *Idx = dyn_cast<ConstantInt>(I->getOperand(2));
  1111. if (!Idx) {
  1112. // Note that we can't propagate undef elt info, because we don't know
  1113. // which elt is getting updated.
  1114. simplifyAndSetOp(I, 0, DemandedElts, UndefElts2);
  1115. break;
  1116. }
  1117. // The element inserted overwrites whatever was there, so the input demanded
  1118. // set is simpler than the output set.
  1119. unsigned IdxNo = Idx->getZExtValue();
  1120. APInt PreInsertDemandedElts = DemandedElts;
  1121. if (IdxNo < VWidth)
  1122. PreInsertDemandedElts.clearBit(IdxNo);
  1123. // If we only demand the element that is being inserted and that element
  1124. // was extracted from the same index in another vector with the same type,
  1125. // replace this insert with that other vector.
  1126. // Note: This is attempted before the call to simplifyAndSetOp because that
  1127. // may change UndefElts to a value that does not match with Vec.
  1128. Value *Vec;
  1129. if (PreInsertDemandedElts == 0 &&
  1130. match(I->getOperand(1),
  1131. m_ExtractElt(m_Value(Vec), m_SpecificInt(IdxNo))) &&
  1132. Vec->getType() == I->getType()) {
  1133. return Vec;
  1134. }
  1135. simplifyAndSetOp(I, 0, PreInsertDemandedElts, UndefElts);
  1136. // If this is inserting an element that isn't demanded, remove this
  1137. // insertelement.
  1138. if (IdxNo >= VWidth || !DemandedElts[IdxNo]) {
  1139. Worklist.push(I);
  1140. return I->getOperand(0);
  1141. }
  1142. // The inserted element is defined.
  1143. UndefElts.clearBit(IdxNo);
  1144. break;
  1145. }
  1146. case Instruction::ShuffleVector: {
  1147. auto *Shuffle = cast<ShuffleVectorInst>(I);
  1148. assert(Shuffle->getOperand(0)->getType() ==
  1149. Shuffle->getOperand(1)->getType() &&
  1150. "Expected shuffle operands to have same type");
  1151. unsigned OpWidth = cast<FixedVectorType>(Shuffle->getOperand(0)->getType())
  1152. ->getNumElements();
  1153. // Handle trivial case of a splat. Only check the first element of LHS
  1154. // operand.
  1155. if (all_of(Shuffle->getShuffleMask(), [](int Elt) { return Elt == 0; }) &&
  1156. DemandedElts.isAllOnes()) {
  1157. if (!match(I->getOperand(1), m_Undef())) {
  1158. I->setOperand(1, PoisonValue::get(I->getOperand(1)->getType()));
  1159. MadeChange = true;
  1160. }
  1161. APInt LeftDemanded(OpWidth, 1);
  1162. APInt LHSUndefElts(OpWidth, 0);
  1163. simplifyAndSetOp(I, 0, LeftDemanded, LHSUndefElts);
  1164. if (LHSUndefElts[0])
  1165. UndefElts = EltMask;
  1166. else
  1167. UndefElts.clearAllBits();
  1168. break;
  1169. }
  1170. APInt LeftDemanded(OpWidth, 0), RightDemanded(OpWidth, 0);
  1171. for (unsigned i = 0; i < VWidth; i++) {
  1172. if (DemandedElts[i]) {
  1173. unsigned MaskVal = Shuffle->getMaskValue(i);
  1174. if (MaskVal != -1u) {
  1175. assert(MaskVal < OpWidth * 2 &&
  1176. "shufflevector mask index out of range!");
  1177. if (MaskVal < OpWidth)
  1178. LeftDemanded.setBit(MaskVal);
  1179. else
  1180. RightDemanded.setBit(MaskVal - OpWidth);
  1181. }
  1182. }
  1183. }
  1184. APInt LHSUndefElts(OpWidth, 0);
  1185. simplifyAndSetOp(I, 0, LeftDemanded, LHSUndefElts);
  1186. APInt RHSUndefElts(OpWidth, 0);
  1187. simplifyAndSetOp(I, 1, RightDemanded, RHSUndefElts);
  1188. // If this shuffle does not change the vector length and the elements
  1189. // demanded by this shuffle are an identity mask, then this shuffle is
  1190. // unnecessary.
  1191. //
  1192. // We are assuming canonical form for the mask, so the source vector is
  1193. // operand 0 and operand 1 is not used.
  1194. //
  1195. // Note that if an element is demanded and this shuffle mask is undefined
  1196. // for that element, then the shuffle is not considered an identity
  1197. // operation. The shuffle prevents poison from the operand vector from
  1198. // leaking to the result by replacing poison with an undefined value.
  1199. if (VWidth == OpWidth) {
  1200. bool IsIdentityShuffle = true;
  1201. for (unsigned i = 0; i < VWidth; i++) {
  1202. unsigned MaskVal = Shuffle->getMaskValue(i);
  1203. if (DemandedElts[i] && i != MaskVal) {
  1204. IsIdentityShuffle = false;
  1205. break;
  1206. }
  1207. }
  1208. if (IsIdentityShuffle)
  1209. return Shuffle->getOperand(0);
  1210. }
  1211. bool NewUndefElts = false;
  1212. unsigned LHSIdx = -1u, LHSValIdx = -1u;
  1213. unsigned RHSIdx = -1u, RHSValIdx = -1u;
  1214. bool LHSUniform = true;
  1215. bool RHSUniform = true;
  1216. for (unsigned i = 0; i < VWidth; i++) {
  1217. unsigned MaskVal = Shuffle->getMaskValue(i);
  1218. if (MaskVal == -1u) {
  1219. UndefElts.setBit(i);
  1220. } else if (!DemandedElts[i]) {
  1221. NewUndefElts = true;
  1222. UndefElts.setBit(i);
  1223. } else if (MaskVal < OpWidth) {
  1224. if (LHSUndefElts[MaskVal]) {
  1225. NewUndefElts = true;
  1226. UndefElts.setBit(i);
  1227. } else {
  1228. LHSIdx = LHSIdx == -1u ? i : OpWidth;
  1229. LHSValIdx = LHSValIdx == -1u ? MaskVal : OpWidth;
  1230. LHSUniform = LHSUniform && (MaskVal == i);
  1231. }
  1232. } else {
  1233. if (RHSUndefElts[MaskVal - OpWidth]) {
  1234. NewUndefElts = true;
  1235. UndefElts.setBit(i);
  1236. } else {
  1237. RHSIdx = RHSIdx == -1u ? i : OpWidth;
  1238. RHSValIdx = RHSValIdx == -1u ? MaskVal - OpWidth : OpWidth;
  1239. RHSUniform = RHSUniform && (MaskVal - OpWidth == i);
  1240. }
  1241. }
  1242. }
  1243. // Try to transform shuffle with constant vector and single element from
  1244. // this constant vector to single insertelement instruction.
  1245. // shufflevector V, C, <v1, v2, .., ci, .., vm> ->
  1246. // insertelement V, C[ci], ci-n
  1247. if (OpWidth ==
  1248. cast<FixedVectorType>(Shuffle->getType())->getNumElements()) {
  1249. Value *Op = nullptr;
  1250. Constant *Value = nullptr;
  1251. unsigned Idx = -1u;
  1252. // Find constant vector with the single element in shuffle (LHS or RHS).
  1253. if (LHSIdx < OpWidth && RHSUniform) {
  1254. if (auto *CV = dyn_cast<ConstantVector>(Shuffle->getOperand(0))) {
  1255. Op = Shuffle->getOperand(1);
  1256. Value = CV->getOperand(LHSValIdx);
  1257. Idx = LHSIdx;
  1258. }
  1259. }
  1260. if (RHSIdx < OpWidth && LHSUniform) {
  1261. if (auto *CV = dyn_cast<ConstantVector>(Shuffle->getOperand(1))) {
  1262. Op = Shuffle->getOperand(0);
  1263. Value = CV->getOperand(RHSValIdx);
  1264. Idx = RHSIdx;
  1265. }
  1266. }
  1267. // Found constant vector with single element - convert to insertelement.
  1268. if (Op && Value) {
  1269. Instruction *New = InsertElementInst::Create(
  1270. Op, Value, ConstantInt::get(Type::getInt32Ty(I->getContext()), Idx),
  1271. Shuffle->getName());
  1272. InsertNewInstWith(New, *Shuffle);
  1273. return New;
  1274. }
  1275. }
  1276. if (NewUndefElts) {
  1277. // Add additional discovered undefs.
  1278. SmallVector<int, 16> Elts;
  1279. for (unsigned i = 0; i < VWidth; ++i) {
  1280. if (UndefElts[i])
  1281. Elts.push_back(UndefMaskElem);
  1282. else
  1283. Elts.push_back(Shuffle->getMaskValue(i));
  1284. }
  1285. Shuffle->setShuffleMask(Elts);
  1286. MadeChange = true;
  1287. }
  1288. break;
  1289. }
  1290. case Instruction::Select: {
  1291. // If this is a vector select, try to transform the select condition based
  1292. // on the current demanded elements.
  1293. SelectInst *Sel = cast<SelectInst>(I);
  1294. if (Sel->getCondition()->getType()->isVectorTy()) {
  1295. // TODO: We are not doing anything with UndefElts based on this call.
  1296. // It is overwritten below based on the other select operands. If an
  1297. // element of the select condition is known undef, then we are free to
  1298. // choose the output value from either arm of the select. If we know that
  1299. // one of those values is undef, then the output can be undef.
  1300. simplifyAndSetOp(I, 0, DemandedElts, UndefElts);
  1301. }
  1302. // Next, see if we can transform the arms of the select.
  1303. APInt DemandedLHS(DemandedElts), DemandedRHS(DemandedElts);
  1304. if (auto *CV = dyn_cast<ConstantVector>(Sel->getCondition())) {
  1305. for (unsigned i = 0; i < VWidth; i++) {
  1306. // isNullValue() always returns false when called on a ConstantExpr.
  1307. // Skip constant expressions to avoid propagating incorrect information.
  1308. Constant *CElt = CV->getAggregateElement(i);
  1309. if (isa<ConstantExpr>(CElt))
  1310. continue;
  1311. // TODO: If a select condition element is undef, we can demand from
  1312. // either side. If one side is known undef, choosing that side would
  1313. // propagate undef.
  1314. if (CElt->isNullValue())
  1315. DemandedLHS.clearBit(i);
  1316. else
  1317. DemandedRHS.clearBit(i);
  1318. }
  1319. }
  1320. simplifyAndSetOp(I, 1, DemandedLHS, UndefElts2);
  1321. simplifyAndSetOp(I, 2, DemandedRHS, UndefElts3);
  1322. // Output elements are undefined if the element from each arm is undefined.
  1323. // TODO: This can be improved. See comment in select condition handling.
  1324. UndefElts = UndefElts2 & UndefElts3;
  1325. break;
  1326. }
  1327. case Instruction::BitCast: {
  1328. // Vector->vector casts only.
  1329. VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType());
  1330. if (!VTy) break;
  1331. unsigned InVWidth = cast<FixedVectorType>(VTy)->getNumElements();
  1332. APInt InputDemandedElts(InVWidth, 0);
  1333. UndefElts2 = APInt(InVWidth, 0);
  1334. unsigned Ratio;
  1335. if (VWidth == InVWidth) {
  1336. // If we are converting from <4 x i32> -> <4 x f32>, we demand the same
  1337. // elements as are demanded of us.
  1338. Ratio = 1;
  1339. InputDemandedElts = DemandedElts;
  1340. } else if ((VWidth % InVWidth) == 0) {
  1341. // If the number of elements in the output is a multiple of the number of
  1342. // elements in the input then an input element is live if any of the
  1343. // corresponding output elements are live.
  1344. Ratio = VWidth / InVWidth;
  1345. for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
  1346. if (DemandedElts[OutIdx])
  1347. InputDemandedElts.setBit(OutIdx / Ratio);
  1348. } else if ((InVWidth % VWidth) == 0) {
  1349. // If the number of elements in the input is a multiple of the number of
  1350. // elements in the output then an input element is live if the
  1351. // corresponding output element is live.
  1352. Ratio = InVWidth / VWidth;
  1353. for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
  1354. if (DemandedElts[InIdx / Ratio])
  1355. InputDemandedElts.setBit(InIdx);
  1356. } else {
  1357. // Unsupported so far.
  1358. break;
  1359. }
  1360. simplifyAndSetOp(I, 0, InputDemandedElts, UndefElts2);
  1361. if (VWidth == InVWidth) {
  1362. UndefElts = UndefElts2;
  1363. } else if ((VWidth % InVWidth) == 0) {
  1364. // If the number of elements in the output is a multiple of the number of
  1365. // elements in the input then an output element is undef if the
  1366. // corresponding input element is undef.
  1367. for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
  1368. if (UndefElts2[OutIdx / Ratio])
  1369. UndefElts.setBit(OutIdx);
  1370. } else if ((InVWidth % VWidth) == 0) {
  1371. // If the number of elements in the input is a multiple of the number of
  1372. // elements in the output then an output element is undef if all of the
  1373. // corresponding input elements are undef.
  1374. for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) {
  1375. APInt SubUndef = UndefElts2.lshr(OutIdx * Ratio).zextOrTrunc(Ratio);
  1376. if (SubUndef.countPopulation() == Ratio)
  1377. UndefElts.setBit(OutIdx);
  1378. }
  1379. } else {
  1380. llvm_unreachable("Unimp");
  1381. }
  1382. break;
  1383. }
  1384. case Instruction::FPTrunc:
  1385. case Instruction::FPExt:
  1386. simplifyAndSetOp(I, 0, DemandedElts, UndefElts);
  1387. break;
  1388. case Instruction::Call: {
  1389. IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
  1390. if (!II) break;
  1391. switch (II->getIntrinsicID()) {
  1392. case Intrinsic::masked_gather: // fallthrough
  1393. case Intrinsic::masked_load: {
  1394. // Subtlety: If we load from a pointer, the pointer must be valid
  1395. // regardless of whether the element is demanded. Doing otherwise risks
  1396. // segfaults which didn't exist in the original program.
  1397. APInt DemandedPtrs(APInt::getAllOnes(VWidth)),
  1398. DemandedPassThrough(DemandedElts);
  1399. if (auto *CV = dyn_cast<ConstantVector>(II->getOperand(2)))
  1400. for (unsigned i = 0; i < VWidth; i++) {
  1401. Constant *CElt = CV->getAggregateElement(i);
  1402. if (CElt->isNullValue())
  1403. DemandedPtrs.clearBit(i);
  1404. else if (CElt->isAllOnesValue())
  1405. DemandedPassThrough.clearBit(i);
  1406. }
  1407. if (II->getIntrinsicID() == Intrinsic::masked_gather)
  1408. simplifyAndSetOp(II, 0, DemandedPtrs, UndefElts2);
  1409. simplifyAndSetOp(II, 3, DemandedPassThrough, UndefElts3);
  1410. // Output elements are undefined if the element from both sources are.
  1411. // TODO: can strengthen via mask as well.
  1412. UndefElts = UndefElts2 & UndefElts3;
  1413. break;
  1414. }
  1415. default: {
  1416. // Handle target specific intrinsics
  1417. Optional<Value *> V = targetSimplifyDemandedVectorEltsIntrinsic(
  1418. *II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
  1419. simplifyAndSetOp);
  1420. if (V.hasValue())
  1421. return V.getValue();
  1422. break;
  1423. }
  1424. } // switch on IntrinsicID
  1425. break;
  1426. } // case Call
  1427. } // switch on Opcode
  1428. // TODO: We bail completely on integer div/rem and shifts because they have
  1429. // UB/poison potential, but that should be refined.
  1430. BinaryOperator *BO;
  1431. if (match(I, m_BinOp(BO)) && !BO->isIntDivRem() && !BO->isShift()) {
  1432. simplifyAndSetOp(I, 0, DemandedElts, UndefElts);
  1433. simplifyAndSetOp(I, 1, DemandedElts, UndefElts2);
  1434. // Output elements are undefined if both are undefined. Consider things
  1435. // like undef & 0. The result is known zero, not undef.
  1436. UndefElts &= UndefElts2;
  1437. }
  1438. // If we've proven all of the lanes undef, return an undef value.
  1439. // TODO: Intersect w/demanded lanes
  1440. if (UndefElts.isAllOnes())
  1441. return UndefValue::get(I->getType());;
  1442. return MadeChange ? I : nullptr;
  1443. }