InstCombineSimplifyDemanded.cpp 59 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535
  1. //===- InstCombineSimplifyDemanded.cpp ------------------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file contains logic for simplifying instructions based on information
  10. // about how they are used.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "InstCombineInternal.h"
  14. #include "llvm/Analysis/TargetTransformInfo.h"
  15. #include "llvm/Analysis/ValueTracking.h"
  16. #include "llvm/IR/IntrinsicInst.h"
  17. #include "llvm/IR/PatternMatch.h"
  18. #include "llvm/Support/KnownBits.h"
  19. #include "llvm/Transforms/InstCombine/InstCombiner.h"
  20. using namespace llvm;
  21. using namespace llvm::PatternMatch;
  22. #define DEBUG_TYPE "instcombine"
  23. /// Check to see if the specified operand of the specified instruction is a
  24. /// constant integer. If so, check to see if there are any bits set in the
  25. /// constant that are not demanded. If so, shrink the constant and return true.
  26. static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
  27. const APInt &Demanded) {
  28. assert(I && "No instruction?");
  29. assert(OpNo < I->getNumOperands() && "Operand index too large");
  30. // The operand must be a constant integer or splat integer.
  31. Value *Op = I->getOperand(OpNo);
  32. const APInt *C;
  33. if (!match(Op, m_APInt(C)))
  34. return false;
  35. // If there are no bits set that aren't demanded, nothing to do.
  36. if (C->isSubsetOf(Demanded))
  37. return false;
  38. // This instruction is producing bits that are not demanded. Shrink the RHS.
  39. I->setOperand(OpNo, ConstantInt::get(Op->getType(), *C & Demanded));
  40. return true;
  41. }
  42. /// Inst is an integer instruction that SimplifyDemandedBits knows about. See if
  43. /// the instruction has any properties that allow us to simplify its operands.
  44. bool InstCombinerImpl::SimplifyDemandedInstructionBits(Instruction &Inst) {
  45. unsigned BitWidth = Inst.getType()->getScalarSizeInBits();
  46. KnownBits Known(BitWidth);
  47. APInt DemandedMask(APInt::getAllOnesValue(BitWidth));
  48. Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, Known,
  49. 0, &Inst);
  50. if (!V) return false;
  51. if (V == &Inst) return true;
  52. replaceInstUsesWith(Inst, V);
  53. return true;
  54. }
  55. /// This form of SimplifyDemandedBits simplifies the specified instruction
  56. /// operand if possible, updating it in place. It returns true if it made any
  57. /// change and false otherwise.
  58. bool InstCombinerImpl::SimplifyDemandedBits(Instruction *I, unsigned OpNo,
  59. const APInt &DemandedMask,
  60. KnownBits &Known, unsigned Depth) {
  61. Use &U = I->getOperandUse(OpNo);
  62. Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask, Known,
  63. Depth, I);
  64. if (!NewVal) return false;
  65. if (Instruction* OpInst = dyn_cast<Instruction>(U))
  66. salvageDebugInfo(*OpInst);
  67. replaceUse(U, NewVal);
  68. return true;
  69. }
  70. /// This function attempts to replace V with a simpler value based on the
  71. /// demanded bits. When this function is called, it is known that only the bits
  72. /// set in DemandedMask of the result of V are ever used downstream.
  73. /// Consequently, depending on the mask and V, it may be possible to replace V
  74. /// with a constant or one of its operands. In such cases, this function does
  75. /// the replacement and returns true. In all other cases, it returns false after
  76. /// analyzing the expression and setting KnownOne and known to be one in the
  77. /// expression. Known.Zero contains all the bits that are known to be zero in
  78. /// the expression. These are provided to potentially allow the caller (which
  79. /// might recursively be SimplifyDemandedBits itself) to simplify the
  80. /// expression.
  81. /// Known.One and Known.Zero always follow the invariant that:
  82. /// Known.One & Known.Zero == 0.
  83. /// That is, a bit can't be both 1 and 0. Note that the bits in Known.One and
  84. /// Known.Zero may only be accurate for those bits set in DemandedMask. Note
  85. /// also that the bitwidth of V, DemandedMask, Known.Zero and Known.One must all
  86. /// be the same.
  87. ///
  88. /// This returns null if it did not change anything and it permits no
  89. /// simplification. This returns V itself if it did some simplification of V's
  90. /// operands based on the information about what bits are demanded. This returns
  91. /// some other non-null value if it found out that V is equal to another value
  92. /// in the context where the specified bits are demanded, but not for all users.
  93. Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
  94. KnownBits &Known,
  95. unsigned Depth,
  96. Instruction *CxtI) {
  97. assert(V != nullptr && "Null pointer of Value???");
  98. assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
  99. uint32_t BitWidth = DemandedMask.getBitWidth();
  100. Type *VTy = V->getType();
  101. assert(
  102. (!VTy->isIntOrIntVectorTy() || VTy->getScalarSizeInBits() == BitWidth) &&
  103. Known.getBitWidth() == BitWidth &&
  104. "Value *V, DemandedMask and Known must have same BitWidth");
  105. if (isa<Constant>(V)) {
  106. computeKnownBits(V, Known, Depth, CxtI);
  107. return nullptr;
  108. }
  109. Known.resetAll();
  110. if (DemandedMask.isNullValue()) // Not demanding any bits from V.
  111. return UndefValue::get(VTy);
  112. if (Depth == MaxAnalysisRecursionDepth)
  113. return nullptr;
  114. if (isa<ScalableVectorType>(VTy))
  115. return nullptr;
  116. Instruction *I = dyn_cast<Instruction>(V);
  117. if (!I) {
  118. computeKnownBits(V, Known, Depth, CxtI);
  119. return nullptr; // Only analyze instructions.
  120. }
  121. // If there are multiple uses of this value and we aren't at the root, then
  122. // we can't do any simplifications of the operands, because DemandedMask
  123. // only reflects the bits demanded by *one* of the users.
  124. if (Depth != 0 && !I->hasOneUse())
  125. return SimplifyMultipleUseDemandedBits(I, DemandedMask, Known, Depth, CxtI);
  126. KnownBits LHSKnown(BitWidth), RHSKnown(BitWidth);
  127. // If this is the root being simplified, allow it to have multiple uses,
  128. // just set the DemandedMask to all bits so that we can try to simplify the
  129. // operands. This allows visitTruncInst (for example) to simplify the
  130. // operand of a trunc without duplicating all the logic below.
  131. if (Depth == 0 && !V->hasOneUse())
  132. DemandedMask.setAllBits();
  133. switch (I->getOpcode()) {
  134. default:
  135. computeKnownBits(I, Known, Depth, CxtI);
  136. break;
  137. case Instruction::And: {
  138. // If either the LHS or the RHS are Zero, the result is zero.
  139. if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Depth + 1) ||
  140. SimplifyDemandedBits(I, 0, DemandedMask & ~RHSKnown.Zero, LHSKnown,
  141. Depth + 1))
  142. return I;
  143. assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?");
  144. assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?");
  145. Known = LHSKnown & RHSKnown;
  146. // If the client is only demanding bits that we know, return the known
  147. // constant.
  148. if (DemandedMask.isSubsetOf(Known.Zero | Known.One))
  149. return Constant::getIntegerValue(VTy, Known.One);
  150. // If all of the demanded bits are known 1 on one side, return the other.
  151. // These bits cannot contribute to the result of the 'and'.
  152. if (DemandedMask.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
  153. return I->getOperand(0);
  154. if (DemandedMask.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
  155. return I->getOperand(1);
  156. // If the RHS is a constant, see if we can simplify it.
  157. if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnown.Zero))
  158. return I;
  159. break;
  160. }
  161. case Instruction::Or: {
  162. // If either the LHS or the RHS are One, the result is One.
  163. if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Depth + 1) ||
  164. SimplifyDemandedBits(I, 0, DemandedMask & ~RHSKnown.One, LHSKnown,
  165. Depth + 1))
  166. return I;
  167. assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?");
  168. assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?");
  169. Known = LHSKnown | RHSKnown;
  170. // If the client is only demanding bits that we know, return the known
  171. // constant.
  172. if (DemandedMask.isSubsetOf(Known.Zero | Known.One))
  173. return Constant::getIntegerValue(VTy, Known.One);
  174. // If all of the demanded bits are known zero on one side, return the other.
  175. // These bits cannot contribute to the result of the 'or'.
  176. if (DemandedMask.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
  177. return I->getOperand(0);
  178. if (DemandedMask.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
  179. return I->getOperand(1);
  180. // If the RHS is a constant, see if we can simplify it.
  181. if (ShrinkDemandedConstant(I, 1, DemandedMask))
  182. return I;
  183. break;
  184. }
  185. case Instruction::Xor: {
  186. if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Depth + 1) ||
  187. SimplifyDemandedBits(I, 0, DemandedMask, LHSKnown, Depth + 1))
  188. return I;
  189. assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?");
  190. assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?");
  191. Known = LHSKnown ^ RHSKnown;
  192. // If the client is only demanding bits that we know, return the known
  193. // constant.
  194. if (DemandedMask.isSubsetOf(Known.Zero | Known.One))
  195. return Constant::getIntegerValue(VTy, Known.One);
  196. // If all of the demanded bits are known zero on one side, return the other.
  197. // These bits cannot contribute to the result of the 'xor'.
  198. if (DemandedMask.isSubsetOf(RHSKnown.Zero))
  199. return I->getOperand(0);
  200. if (DemandedMask.isSubsetOf(LHSKnown.Zero))
  201. return I->getOperand(1);
  202. // If all of the demanded bits are known to be zero on one side or the
  203. // other, turn this into an *inclusive* or.
  204. // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
  205. if (DemandedMask.isSubsetOf(RHSKnown.Zero | LHSKnown.Zero)) {
  206. Instruction *Or =
  207. BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
  208. I->getName());
  209. return InsertNewInstWith(Or, *I);
  210. }
  211. // If all of the demanded bits on one side are known, and all of the set
  212. // bits on that side are also known to be set on the other side, turn this
  213. // into an AND, as we know the bits will be cleared.
  214. // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
  215. if (DemandedMask.isSubsetOf(RHSKnown.Zero|RHSKnown.One) &&
  216. RHSKnown.One.isSubsetOf(LHSKnown.One)) {
  217. Constant *AndC = Constant::getIntegerValue(VTy,
  218. ~RHSKnown.One & DemandedMask);
  219. Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
  220. return InsertNewInstWith(And, *I);
  221. }
  222. // If the RHS is a constant, see if we can change it. Don't alter a -1
  223. // constant because that's a canonical 'not' op, and that is better for
  224. // combining, SCEV, and codegen.
  225. const APInt *C;
  226. if (match(I->getOperand(1), m_APInt(C)) && !C->isAllOnesValue()) {
  227. if ((*C | ~DemandedMask).isAllOnesValue()) {
  228. // Force bits to 1 to create a 'not' op.
  229. I->setOperand(1, ConstantInt::getAllOnesValue(VTy));
  230. return I;
  231. }
  232. // If we can't turn this into a 'not', try to shrink the constant.
  233. if (ShrinkDemandedConstant(I, 1, DemandedMask))
  234. return I;
  235. }
  236. // If our LHS is an 'and' and if it has one use, and if any of the bits we
  237. // are flipping are known to be set, then the xor is just resetting those
  238. // bits to zero. We can just knock out bits from the 'and' and the 'xor',
  239. // simplifying both of them.
  240. if (Instruction *LHSInst = dyn_cast<Instruction>(I->getOperand(0))) {
  241. ConstantInt *AndRHS, *XorRHS;
  242. if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() &&
  243. match(I->getOperand(1), m_ConstantInt(XorRHS)) &&
  244. match(LHSInst->getOperand(1), m_ConstantInt(AndRHS)) &&
  245. (LHSKnown.One & RHSKnown.One & DemandedMask) != 0) {
  246. APInt NewMask = ~(LHSKnown.One & RHSKnown.One & DemandedMask);
  247. Constant *AndC =
  248. ConstantInt::get(I->getType(), NewMask & AndRHS->getValue());
  249. Instruction *NewAnd = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
  250. InsertNewInstWith(NewAnd, *I);
  251. Constant *XorC =
  252. ConstantInt::get(I->getType(), NewMask & XorRHS->getValue());
  253. Instruction *NewXor = BinaryOperator::CreateXor(NewAnd, XorC);
  254. return InsertNewInstWith(NewXor, *I);
  255. }
  256. }
  257. break;
  258. }
  259. case Instruction::Select: {
  260. Value *LHS, *RHS;
  261. SelectPatternFlavor SPF = matchSelectPattern(I, LHS, RHS).Flavor;
  262. if (SPF == SPF_UMAX) {
  263. // UMax(A, C) == A if ...
  264. // The lowest non-zero bit of DemandMask is higher than the highest
  265. // non-zero bit of C.
  266. const APInt *C;
  267. unsigned CTZ = DemandedMask.countTrailingZeros();
  268. if (match(RHS, m_APInt(C)) && CTZ >= C->getActiveBits())
  269. return LHS;
  270. } else if (SPF == SPF_UMIN) {
  271. // UMin(A, C) == A if ...
  272. // The lowest non-zero bit of DemandMask is higher than the highest
  273. // non-one bit of C.
  274. // This comes from using DeMorgans on the above umax example.
  275. const APInt *C;
  276. unsigned CTZ = DemandedMask.countTrailingZeros();
  277. if (match(RHS, m_APInt(C)) &&
  278. CTZ >= C->getBitWidth() - C->countLeadingOnes())
  279. return LHS;
  280. }
  281. // If this is a select as part of any other min/max pattern, don't simplify
  282. // any further in case we break the structure.
  283. if (SPF != SPF_UNKNOWN)
  284. return nullptr;
  285. if (SimplifyDemandedBits(I, 2, DemandedMask, RHSKnown, Depth + 1) ||
  286. SimplifyDemandedBits(I, 1, DemandedMask, LHSKnown, Depth + 1))
  287. return I;
  288. assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?");
  289. assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?");
  290. // If the operands are constants, see if we can simplify them.
  291. // This is similar to ShrinkDemandedConstant, but for a select we want to
  292. // try to keep the selected constants the same as icmp value constants, if
  293. // we can. This helps not break apart (or helps put back together)
  294. // canonical patterns like min and max.
  295. auto CanonicalizeSelectConstant = [](Instruction *I, unsigned OpNo,
  296. const APInt &DemandedMask) {
  297. const APInt *SelC;
  298. if (!match(I->getOperand(OpNo), m_APInt(SelC)))
  299. return false;
  300. // Get the constant out of the ICmp, if there is one.
  301. // Only try this when exactly 1 operand is a constant (if both operands
  302. // are constant, the icmp should eventually simplify). Otherwise, we may
  303. // invert the transform that reduces set bits and infinite-loop.
  304. Value *X;
  305. const APInt *CmpC;
  306. ICmpInst::Predicate Pred;
  307. if (!match(I->getOperand(0), m_ICmp(Pred, m_Value(X), m_APInt(CmpC))) ||
  308. isa<Constant>(X) || CmpC->getBitWidth() != SelC->getBitWidth())
  309. return ShrinkDemandedConstant(I, OpNo, DemandedMask);
  310. // If the constant is already the same as the ICmp, leave it as-is.
  311. if (*CmpC == *SelC)
  312. return false;
  313. // If the constants are not already the same, but can be with the demand
  314. // mask, use the constant value from the ICmp.
  315. if ((*CmpC & DemandedMask) == (*SelC & DemandedMask)) {
  316. I->setOperand(OpNo, ConstantInt::get(I->getType(), *CmpC));
  317. return true;
  318. }
  319. return ShrinkDemandedConstant(I, OpNo, DemandedMask);
  320. };
  321. if (CanonicalizeSelectConstant(I, 1, DemandedMask) ||
  322. CanonicalizeSelectConstant(I, 2, DemandedMask))
  323. return I;
  324. // Only known if known in both the LHS and RHS.
  325. Known = KnownBits::commonBits(LHSKnown, RHSKnown);
  326. break;
  327. }
  328. case Instruction::ZExt:
  329. case Instruction::Trunc: {
  330. unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();
  331. APInt InputDemandedMask = DemandedMask.zextOrTrunc(SrcBitWidth);
  332. KnownBits InputKnown(SrcBitWidth);
  333. if (SimplifyDemandedBits(I, 0, InputDemandedMask, InputKnown, Depth + 1))
  334. return I;
  335. assert(InputKnown.getBitWidth() == SrcBitWidth && "Src width changed?");
  336. Known = InputKnown.zextOrTrunc(BitWidth);
  337. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  338. break;
  339. }
  340. case Instruction::BitCast:
  341. if (!I->getOperand(0)->getType()->isIntOrIntVectorTy())
  342. return nullptr; // vector->int or fp->int?
  343. if (VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) {
  344. if (VectorType *SrcVTy =
  345. dyn_cast<VectorType>(I->getOperand(0)->getType())) {
  346. if (cast<FixedVectorType>(DstVTy)->getNumElements() !=
  347. cast<FixedVectorType>(SrcVTy)->getNumElements())
  348. // Don't touch a bitcast between vectors of different element counts.
  349. return nullptr;
  350. } else
  351. // Don't touch a scalar-to-vector bitcast.
  352. return nullptr;
  353. } else if (I->getOperand(0)->getType()->isVectorTy())
  354. // Don't touch a vector-to-scalar bitcast.
  355. return nullptr;
  356. if (SimplifyDemandedBits(I, 0, DemandedMask, Known, Depth + 1))
  357. return I;
  358. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  359. break;
  360. case Instruction::SExt: {
  361. // Compute the bits in the result that are not present in the input.
  362. unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();
  363. APInt InputDemandedBits = DemandedMask.trunc(SrcBitWidth);
  364. // If any of the sign extended bits are demanded, we know that the sign
  365. // bit is demanded.
  366. if (DemandedMask.getActiveBits() > SrcBitWidth)
  367. InputDemandedBits.setBit(SrcBitWidth-1);
  368. KnownBits InputKnown(SrcBitWidth);
  369. if (SimplifyDemandedBits(I, 0, InputDemandedBits, InputKnown, Depth + 1))
  370. return I;
  371. // If the input sign bit is known zero, or if the NewBits are not demanded
  372. // convert this into a zero extension.
  373. if (InputKnown.isNonNegative() ||
  374. DemandedMask.getActiveBits() <= SrcBitWidth) {
  375. // Convert to ZExt cast.
  376. CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName());
  377. return InsertNewInstWith(NewCast, *I);
  378. }
  379. // If the sign bit of the input is known set or clear, then we know the
  380. // top bits of the result.
  381. Known = InputKnown.sext(BitWidth);
  382. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  383. break;
  384. }
  385. case Instruction::Add:
  386. if ((DemandedMask & 1) == 0) {
  387. // If we do not need the low bit, try to convert bool math to logic:
  388. // add iN (zext i1 X), (sext i1 Y) --> sext (~X & Y) to iN
  389. Value *X, *Y;
  390. if (match(I, m_c_Add(m_OneUse(m_ZExt(m_Value(X))),
  391. m_OneUse(m_SExt(m_Value(Y))))) &&
  392. X->getType()->isIntOrIntVectorTy(1) && X->getType() == Y->getType()) {
  393. // Truth table for inputs and output signbits:
  394. // X:0 | X:1
  395. // ----------
  396. // Y:0 | 0 | 0 |
  397. // Y:1 | -1 | 0 |
  398. // ----------
  399. IRBuilderBase::InsertPointGuard Guard(Builder);
  400. Builder.SetInsertPoint(I);
  401. Value *AndNot = Builder.CreateAnd(Builder.CreateNot(X), Y);
  402. return Builder.CreateSExt(AndNot, VTy);
  403. }
  404. // add iN (sext i1 X), (sext i1 Y) --> sext (X | Y) to iN
  405. // TODO: Relax the one-use checks because we are removing an instruction?
  406. if (match(I, m_Add(m_OneUse(m_SExt(m_Value(X))),
  407. m_OneUse(m_SExt(m_Value(Y))))) &&
  408. X->getType()->isIntOrIntVectorTy(1) && X->getType() == Y->getType()) {
  409. // Truth table for inputs and output signbits:
  410. // X:0 | X:1
  411. // -----------
  412. // Y:0 | -1 | -1 |
  413. // Y:1 | -1 | 0 |
  414. // -----------
  415. IRBuilderBase::InsertPointGuard Guard(Builder);
  416. Builder.SetInsertPoint(I);
  417. Value *Or = Builder.CreateOr(X, Y);
  418. return Builder.CreateSExt(Or, VTy);
  419. }
  420. }
  421. LLVM_FALLTHROUGH;
  422. case Instruction::Sub: {
  423. /// If the high-bits of an ADD/SUB are not demanded, then we do not care
  424. /// about the high bits of the operands.
  425. unsigned NLZ = DemandedMask.countLeadingZeros();
  426. // Right fill the mask of bits for this ADD/SUB to demand the most
  427. // significant bit and all those below it.
  428. APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
  429. if (ShrinkDemandedConstant(I, 0, DemandedFromOps) ||
  430. SimplifyDemandedBits(I, 0, DemandedFromOps, LHSKnown, Depth + 1) ||
  431. ShrinkDemandedConstant(I, 1, DemandedFromOps) ||
  432. SimplifyDemandedBits(I, 1, DemandedFromOps, RHSKnown, Depth + 1)) {
  433. if (NLZ > 0) {
  434. // Disable the nsw and nuw flags here: We can no longer guarantee that
  435. // we won't wrap after simplification. Removing the nsw/nuw flags is
  436. // legal here because the top bit is not demanded.
  437. BinaryOperator &BinOP = *cast<BinaryOperator>(I);
  438. BinOP.setHasNoSignedWrap(false);
  439. BinOP.setHasNoUnsignedWrap(false);
  440. }
  441. return I;
  442. }
  443. // If we are known to be adding/subtracting zeros to every bit below
  444. // the highest demanded bit, we just return the other side.
  445. if (DemandedFromOps.isSubsetOf(RHSKnown.Zero))
  446. return I->getOperand(0);
  447. // We can't do this with the LHS for subtraction, unless we are only
  448. // demanding the LSB.
  449. if ((I->getOpcode() == Instruction::Add ||
  450. DemandedFromOps.isOneValue()) &&
  451. DemandedFromOps.isSubsetOf(LHSKnown.Zero))
  452. return I->getOperand(1);
  453. // Otherwise just compute the known bits of the result.
  454. bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
  455. Known = KnownBits::computeForAddSub(I->getOpcode() == Instruction::Add,
  456. NSW, LHSKnown, RHSKnown);
  457. break;
  458. }
  459. case Instruction::Shl: {
  460. const APInt *SA;
  461. if (match(I->getOperand(1), m_APInt(SA))) {
  462. const APInt *ShrAmt;
  463. if (match(I->getOperand(0), m_Shr(m_Value(), m_APInt(ShrAmt))))
  464. if (Instruction *Shr = dyn_cast<Instruction>(I->getOperand(0)))
  465. if (Value *R = simplifyShrShlDemandedBits(Shr, *ShrAmt, I, *SA,
  466. DemandedMask, Known))
  467. return R;
  468. uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
  469. APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt));
  470. // If the shift is NUW/NSW, then it does demand the high bits.
  471. ShlOperator *IOp = cast<ShlOperator>(I);
  472. if (IOp->hasNoSignedWrap())
  473. DemandedMaskIn.setHighBits(ShiftAmt+1);
  474. else if (IOp->hasNoUnsignedWrap())
  475. DemandedMaskIn.setHighBits(ShiftAmt);
  476. if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1))
  477. return I;
  478. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  479. bool SignBitZero = Known.Zero.isSignBitSet();
  480. bool SignBitOne = Known.One.isSignBitSet();
  481. Known.Zero <<= ShiftAmt;
  482. Known.One <<= ShiftAmt;
  483. // low bits known zero.
  484. if (ShiftAmt)
  485. Known.Zero.setLowBits(ShiftAmt);
  486. // If this shift has "nsw" keyword, then the result is either a poison
  487. // value or has the same sign bit as the first operand.
  488. if (IOp->hasNoSignedWrap()) {
  489. if (SignBitZero)
  490. Known.Zero.setSignBit();
  491. else if (SignBitOne)
  492. Known.One.setSignBit();
  493. if (Known.hasConflict())
  494. return UndefValue::get(I->getType());
  495. }
  496. } else {
  497. computeKnownBits(I, Known, Depth, CxtI);
  498. }
  499. break;
  500. }
  501. case Instruction::LShr: {
  502. const APInt *SA;
  503. if (match(I->getOperand(1), m_APInt(SA))) {
  504. uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
  505. // Unsigned shift right.
  506. APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
  507. // If the shift is exact, then it does demand the low bits (and knows that
  508. // they are zero).
  509. if (cast<LShrOperator>(I)->isExact())
  510. DemandedMaskIn.setLowBits(ShiftAmt);
  511. if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1))
  512. return I;
  513. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  514. Known.Zero.lshrInPlace(ShiftAmt);
  515. Known.One.lshrInPlace(ShiftAmt);
  516. if (ShiftAmt)
  517. Known.Zero.setHighBits(ShiftAmt); // high bits known zero.
  518. } else {
  519. computeKnownBits(I, Known, Depth, CxtI);
  520. }
  521. break;
  522. }
  523. case Instruction::AShr: {
  524. // If this is an arithmetic shift right and only the low-bit is set, we can
  525. // always convert this into a logical shr, even if the shift amount is
  526. // variable. The low bit of the shift cannot be an input sign bit unless
  527. // the shift amount is >= the size of the datatype, which is undefined.
  528. if (DemandedMask.isOneValue()) {
  529. // Perform the logical shift right.
  530. Instruction *NewVal = BinaryOperator::CreateLShr(
  531. I->getOperand(0), I->getOperand(1), I->getName());
  532. return InsertNewInstWith(NewVal, *I);
  533. }
  534. // If the sign bit is the only bit demanded by this ashr, then there is no
  535. // need to do it, the shift doesn't change the high bit.
  536. if (DemandedMask.isSignMask())
  537. return I->getOperand(0);
  538. const APInt *SA;
  539. if (match(I->getOperand(1), m_APInt(SA))) {
  540. uint32_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
  541. // Signed shift right.
  542. APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
  543. // If any of the high bits are demanded, we should set the sign bit as
  544. // demanded.
  545. if (DemandedMask.countLeadingZeros() <= ShiftAmt)
  546. DemandedMaskIn.setSignBit();
  547. // If the shift is exact, then it does demand the low bits (and knows that
  548. // they are zero).
  549. if (cast<AShrOperator>(I)->isExact())
  550. DemandedMaskIn.setLowBits(ShiftAmt);
  551. if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1))
  552. return I;
  553. unsigned SignBits = ComputeNumSignBits(I->getOperand(0), Depth + 1, CxtI);
  554. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  555. // Compute the new bits that are at the top now plus sign bits.
  556. APInt HighBits(APInt::getHighBitsSet(
  557. BitWidth, std::min(SignBits + ShiftAmt - 1, BitWidth)));
  558. Known.Zero.lshrInPlace(ShiftAmt);
  559. Known.One.lshrInPlace(ShiftAmt);
  560. // If the input sign bit is known to be zero, or if none of the top bits
  561. // are demanded, turn this into an unsigned shift right.
  562. assert(BitWidth > ShiftAmt && "Shift amount not saturated?");
  563. if (Known.Zero[BitWidth-ShiftAmt-1] ||
  564. !DemandedMask.intersects(HighBits)) {
  565. BinaryOperator *LShr = BinaryOperator::CreateLShr(I->getOperand(0),
  566. I->getOperand(1));
  567. LShr->setIsExact(cast<BinaryOperator>(I)->isExact());
  568. return InsertNewInstWith(LShr, *I);
  569. } else if (Known.One[BitWidth-ShiftAmt-1]) { // New bits are known one.
  570. Known.One |= HighBits;
  571. }
  572. } else {
  573. computeKnownBits(I, Known, Depth, CxtI);
  574. }
  575. break;
  576. }
  577. case Instruction::UDiv: {
  578. // UDiv doesn't demand low bits that are zero in the divisor.
  579. const APInt *SA;
  580. if (match(I->getOperand(1), m_APInt(SA))) {
  581. // If the shift is exact, then it does demand the low bits.
  582. if (cast<UDivOperator>(I)->isExact())
  583. break;
  584. // FIXME: Take the demanded mask of the result into account.
  585. unsigned RHSTrailingZeros = SA->countTrailingZeros();
  586. APInt DemandedMaskIn =
  587. APInt::getHighBitsSet(BitWidth, BitWidth - RHSTrailingZeros);
  588. if (SimplifyDemandedBits(I, 0, DemandedMaskIn, LHSKnown, Depth + 1))
  589. return I;
  590. // Propagate zero bits from the input.
  591. Known.Zero.setHighBits(std::min(
  592. BitWidth, LHSKnown.Zero.countLeadingOnes() + RHSTrailingZeros));
  593. } else {
  594. computeKnownBits(I, Known, Depth, CxtI);
  595. }
  596. break;
  597. }
  598. case Instruction::SRem: {
  599. ConstantInt *Rem;
  600. if (match(I->getOperand(1), m_ConstantInt(Rem))) {
  601. // X % -1 demands all the bits because we don't want to introduce
  602. // INT_MIN % -1 (== undef) by accident.
  603. if (Rem->isMinusOne())
  604. break;
  605. APInt RA = Rem->getValue().abs();
  606. if (RA.isPowerOf2()) {
  607. if (DemandedMask.ult(RA)) // srem won't affect demanded bits
  608. return I->getOperand(0);
  609. APInt LowBits = RA - 1;
  610. APInt Mask2 = LowBits | APInt::getSignMask(BitWidth);
  611. if (SimplifyDemandedBits(I, 0, Mask2, LHSKnown, Depth + 1))
  612. return I;
  613. // The low bits of LHS are unchanged by the srem.
  614. Known.Zero = LHSKnown.Zero & LowBits;
  615. Known.One = LHSKnown.One & LowBits;
  616. // If LHS is non-negative or has all low bits zero, then the upper bits
  617. // are all zero.
  618. if (LHSKnown.isNonNegative() || LowBits.isSubsetOf(LHSKnown.Zero))
  619. Known.Zero |= ~LowBits;
  620. // If LHS is negative and not all low bits are zero, then the upper bits
  621. // are all one.
  622. if (LHSKnown.isNegative() && LowBits.intersects(LHSKnown.One))
  623. Known.One |= ~LowBits;
  624. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  625. break;
  626. }
  627. }
  628. // The sign bit is the LHS's sign bit, except when the result of the
  629. // remainder is zero.
  630. if (DemandedMask.isSignBitSet()) {
  631. computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI);
  632. // If it's known zero, our sign bit is also zero.
  633. if (LHSKnown.isNonNegative())
  634. Known.makeNonNegative();
  635. }
  636. break;
  637. }
  638. case Instruction::URem: {
  639. KnownBits Known2(BitWidth);
  640. APInt AllOnes = APInt::getAllOnesValue(BitWidth);
  641. if (SimplifyDemandedBits(I, 0, AllOnes, Known2, Depth + 1) ||
  642. SimplifyDemandedBits(I, 1, AllOnes, Known2, Depth + 1))
  643. return I;
  644. unsigned Leaders = Known2.countMinLeadingZeros();
  645. Known.Zero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask;
  646. break;
  647. }
  648. case Instruction::Call: {
  649. bool KnownBitsComputed = false;
  650. if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
  651. switch (II->getIntrinsicID()) {
  652. case Intrinsic::bswap: {
  653. // If the only bits demanded come from one byte of the bswap result,
  654. // just shift the input byte into position to eliminate the bswap.
  655. unsigned NLZ = DemandedMask.countLeadingZeros();
  656. unsigned NTZ = DemandedMask.countTrailingZeros();
  657. // Round NTZ down to the next byte. If we have 11 trailing zeros, then
  658. // we need all the bits down to bit 8. Likewise, round NLZ. If we
  659. // have 14 leading zeros, round to 8.
  660. NLZ &= ~7;
  661. NTZ &= ~7;
  662. // If we need exactly one byte, we can do this transformation.
  663. if (BitWidth-NLZ-NTZ == 8) {
  664. unsigned ResultBit = NTZ;
  665. unsigned InputBit = BitWidth-NTZ-8;
  666. // Replace this with either a left or right shift to get the byte into
  667. // the right place.
  668. Instruction *NewVal;
  669. if (InputBit > ResultBit)
  670. NewVal = BinaryOperator::CreateLShr(II->getArgOperand(0),
  671. ConstantInt::get(I->getType(), InputBit-ResultBit));
  672. else
  673. NewVal = BinaryOperator::CreateShl(II->getArgOperand(0),
  674. ConstantInt::get(I->getType(), ResultBit-InputBit));
  675. NewVal->takeName(I);
  676. return InsertNewInstWith(NewVal, *I);
  677. }
  678. break;
  679. }
  680. case Intrinsic::fshr:
  681. case Intrinsic::fshl: {
  682. const APInt *SA;
  683. if (!match(I->getOperand(2), m_APInt(SA)))
  684. break;
  685. // Normalize to funnel shift left. APInt shifts of BitWidth are well-
  686. // defined, so no need to special-case zero shifts here.
  687. uint64_t ShiftAmt = SA->urem(BitWidth);
  688. if (II->getIntrinsicID() == Intrinsic::fshr)
  689. ShiftAmt = BitWidth - ShiftAmt;
  690. APInt DemandedMaskLHS(DemandedMask.lshr(ShiftAmt));
  691. APInt DemandedMaskRHS(DemandedMask.shl(BitWidth - ShiftAmt));
  692. if (SimplifyDemandedBits(I, 0, DemandedMaskLHS, LHSKnown, Depth + 1) ||
  693. SimplifyDemandedBits(I, 1, DemandedMaskRHS, RHSKnown, Depth + 1))
  694. return I;
  695. Known.Zero = LHSKnown.Zero.shl(ShiftAmt) |
  696. RHSKnown.Zero.lshr(BitWidth - ShiftAmt);
  697. Known.One = LHSKnown.One.shl(ShiftAmt) |
  698. RHSKnown.One.lshr(BitWidth - ShiftAmt);
  699. KnownBitsComputed = true;
  700. break;
  701. }
  702. default: {
  703. // Handle target specific intrinsics
  704. Optional<Value *> V = targetSimplifyDemandedUseBitsIntrinsic(
  705. *II, DemandedMask, Known, KnownBitsComputed);
  706. if (V.hasValue())
  707. return V.getValue();
  708. break;
  709. }
  710. }
  711. }
  712. if (!KnownBitsComputed)
  713. computeKnownBits(V, Known, Depth, CxtI);
  714. break;
  715. }
  716. }
  717. // If the client is only demanding bits that we know, return the known
  718. // constant.
  719. if (DemandedMask.isSubsetOf(Known.Zero|Known.One))
  720. return Constant::getIntegerValue(VTy, Known.One);
  721. return nullptr;
  722. }
  723. /// Helper routine of SimplifyDemandedUseBits. It computes Known
  724. /// bits. It also tries to handle simplifications that can be done based on
  725. /// DemandedMask, but without modifying the Instruction.
  726. Value *InstCombinerImpl::SimplifyMultipleUseDemandedBits(
  727. Instruction *I, const APInt &DemandedMask, KnownBits &Known, unsigned Depth,
  728. Instruction *CxtI) {
  729. unsigned BitWidth = DemandedMask.getBitWidth();
  730. Type *ITy = I->getType();
  731. KnownBits LHSKnown(BitWidth);
  732. KnownBits RHSKnown(BitWidth);
  733. // Despite the fact that we can't simplify this instruction in all User's
  734. // context, we can at least compute the known bits, and we can
  735. // do simplifications that apply to *just* the one user if we know that
  736. // this instruction has a simpler value in that context.
  737. switch (I->getOpcode()) {
  738. case Instruction::And: {
  739. // If either the LHS or the RHS are Zero, the result is zero.
  740. computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI);
  741. computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1,
  742. CxtI);
  743. Known = LHSKnown & RHSKnown;
  744. // If the client is only demanding bits that we know, return the known
  745. // constant.
  746. if (DemandedMask.isSubsetOf(Known.Zero | Known.One))
  747. return Constant::getIntegerValue(ITy, Known.One);
  748. // If all of the demanded bits are known 1 on one side, return the other.
  749. // These bits cannot contribute to the result of the 'and' in this
  750. // context.
  751. if (DemandedMask.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
  752. return I->getOperand(0);
  753. if (DemandedMask.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
  754. return I->getOperand(1);
  755. break;
  756. }
  757. case Instruction::Or: {
  758. // We can simplify (X|Y) -> X or Y in the user's context if we know that
  759. // only bits from X or Y are demanded.
  760. // If either the LHS or the RHS are One, the result is One.
  761. computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI);
  762. computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1,
  763. CxtI);
  764. Known = LHSKnown | RHSKnown;
  765. // If the client is only demanding bits that we know, return the known
  766. // constant.
  767. if (DemandedMask.isSubsetOf(Known.Zero | Known.One))
  768. return Constant::getIntegerValue(ITy, Known.One);
  769. // If all of the demanded bits are known zero on one side, return the
  770. // other. These bits cannot contribute to the result of the 'or' in this
  771. // context.
  772. if (DemandedMask.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
  773. return I->getOperand(0);
  774. if (DemandedMask.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
  775. return I->getOperand(1);
  776. break;
  777. }
  778. case Instruction::Xor: {
  779. // We can simplify (X^Y) -> X or Y in the user's context if we know that
  780. // only bits from X or Y are demanded.
  781. computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI);
  782. computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1,
  783. CxtI);
  784. Known = LHSKnown ^ RHSKnown;
  785. // If the client is only demanding bits that we know, return the known
  786. // constant.
  787. if (DemandedMask.isSubsetOf(Known.Zero | Known.One))
  788. return Constant::getIntegerValue(ITy, Known.One);
  789. // If all of the demanded bits are known zero on one side, return the
  790. // other.
  791. if (DemandedMask.isSubsetOf(RHSKnown.Zero))
  792. return I->getOperand(0);
  793. if (DemandedMask.isSubsetOf(LHSKnown.Zero))
  794. return I->getOperand(1);
  795. break;
  796. }
  797. case Instruction::AShr: {
  798. // Compute the Known bits to simplify things downstream.
  799. computeKnownBits(I, Known, Depth, CxtI);
  800. // If this user is only demanding bits that we know, return the known
  801. // constant.
  802. if (DemandedMask.isSubsetOf(Known.Zero | Known.One))
  803. return Constant::getIntegerValue(ITy, Known.One);
  804. // If the right shift operand 0 is a result of a left shift by the same
  805. // amount, this is probably a zero/sign extension, which may be unnecessary,
  806. // if we do not demand any of the new sign bits. So, return the original
  807. // operand instead.
  808. const APInt *ShiftRC;
  809. const APInt *ShiftLC;
  810. Value *X;
  811. unsigned BitWidth = DemandedMask.getBitWidth();
  812. if (match(I,
  813. m_AShr(m_Shl(m_Value(X), m_APInt(ShiftLC)), m_APInt(ShiftRC))) &&
  814. ShiftLC == ShiftRC &&
  815. DemandedMask.isSubsetOf(APInt::getLowBitsSet(
  816. BitWidth, BitWidth - ShiftRC->getZExtValue()))) {
  817. return X;
  818. }
  819. break;
  820. }
  821. default:
  822. // Compute the Known bits to simplify things downstream.
  823. computeKnownBits(I, Known, Depth, CxtI);
  824. // If this user is only demanding bits that we know, return the known
  825. // constant.
  826. if (DemandedMask.isSubsetOf(Known.Zero|Known.One))
  827. return Constant::getIntegerValue(ITy, Known.One);
  828. break;
  829. }
  830. return nullptr;
  831. }
  832. /// Helper routine of SimplifyDemandedUseBits. It tries to simplify
  833. /// "E1 = (X lsr C1) << C2", where the C1 and C2 are constant, into
  834. /// "E2 = X << (C2 - C1)" or "E2 = X >> (C1 - C2)", depending on the sign
  835. /// of "C2-C1".
  836. ///
  837. /// Suppose E1 and E2 are generally different in bits S={bm, bm+1,
  838. /// ..., bn}, without considering the specific value X is holding.
  839. /// This transformation is legal iff one of following conditions is hold:
  840. /// 1) All the bit in S are 0, in this case E1 == E2.
  841. /// 2) We don't care those bits in S, per the input DemandedMask.
  842. /// 3) Combination of 1) and 2). Some bits in S are 0, and we don't care the
  843. /// rest bits.
  844. ///
  845. /// Currently we only test condition 2).
  846. ///
  847. /// As with SimplifyDemandedUseBits, it returns NULL if the simplification was
  848. /// not successful.
  849. Value *InstCombinerImpl::simplifyShrShlDemandedBits(
  850. Instruction *Shr, const APInt &ShrOp1, Instruction *Shl,
  851. const APInt &ShlOp1, const APInt &DemandedMask, KnownBits &Known) {
  852. if (!ShlOp1 || !ShrOp1)
  853. return nullptr; // No-op.
  854. Value *VarX = Shr->getOperand(0);
  855. Type *Ty = VarX->getType();
  856. unsigned BitWidth = Ty->getScalarSizeInBits();
  857. if (ShlOp1.uge(BitWidth) || ShrOp1.uge(BitWidth))
  858. return nullptr; // Undef.
  859. unsigned ShlAmt = ShlOp1.getZExtValue();
  860. unsigned ShrAmt = ShrOp1.getZExtValue();
  861. Known.One.clearAllBits();
  862. Known.Zero.setLowBits(ShlAmt - 1);
  863. Known.Zero &= DemandedMask;
  864. APInt BitMask1(APInt::getAllOnesValue(BitWidth));
  865. APInt BitMask2(APInt::getAllOnesValue(BitWidth));
  866. bool isLshr = (Shr->getOpcode() == Instruction::LShr);
  867. BitMask1 = isLshr ? (BitMask1.lshr(ShrAmt) << ShlAmt) :
  868. (BitMask1.ashr(ShrAmt) << ShlAmt);
  869. if (ShrAmt <= ShlAmt) {
  870. BitMask2 <<= (ShlAmt - ShrAmt);
  871. } else {
  872. BitMask2 = isLshr ? BitMask2.lshr(ShrAmt - ShlAmt):
  873. BitMask2.ashr(ShrAmt - ShlAmt);
  874. }
  875. // Check if condition-2 (see the comment to this function) is satified.
  876. if ((BitMask1 & DemandedMask) == (BitMask2 & DemandedMask)) {
  877. if (ShrAmt == ShlAmt)
  878. return VarX;
  879. if (!Shr->hasOneUse())
  880. return nullptr;
  881. BinaryOperator *New;
  882. if (ShrAmt < ShlAmt) {
  883. Constant *Amt = ConstantInt::get(VarX->getType(), ShlAmt - ShrAmt);
  884. New = BinaryOperator::CreateShl(VarX, Amt);
  885. BinaryOperator *Orig = cast<BinaryOperator>(Shl);
  886. New->setHasNoSignedWrap(Orig->hasNoSignedWrap());
  887. New->setHasNoUnsignedWrap(Orig->hasNoUnsignedWrap());
  888. } else {
  889. Constant *Amt = ConstantInt::get(VarX->getType(), ShrAmt - ShlAmt);
  890. New = isLshr ? BinaryOperator::CreateLShr(VarX, Amt) :
  891. BinaryOperator::CreateAShr(VarX, Amt);
  892. if (cast<BinaryOperator>(Shr)->isExact())
  893. New->setIsExact(true);
  894. }
  895. return InsertNewInstWith(New, *Shl);
  896. }
  897. return nullptr;
  898. }
  899. /// The specified value produces a vector with any number of elements.
  900. /// This method analyzes which elements of the operand are undef or poison and
  901. /// returns that information in UndefElts.
  902. ///
  903. /// DemandedElts contains the set of elements that are actually used by the
  904. /// caller, and by default (AllowMultipleUsers equals false) the value is
  905. /// simplified only if it has a single caller. If AllowMultipleUsers is set
  906. /// to true, DemandedElts refers to the union of sets of elements that are
  907. /// used by all callers.
  908. ///
  909. /// If the information about demanded elements can be used to simplify the
  910. /// operation, the operation is simplified, then the resultant value is
  911. /// returned. This returns null if no change was made.
  912. Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
  913. APInt DemandedElts,
  914. APInt &UndefElts,
  915. unsigned Depth,
  916. bool AllowMultipleUsers) {
  917. // Cannot analyze scalable type. The number of vector elements is not a
  918. // compile-time constant.
  919. if (isa<ScalableVectorType>(V->getType()))
  920. return nullptr;
  921. unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
  922. APInt EltMask(APInt::getAllOnesValue(VWidth));
  923. assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!");
  924. if (isa<UndefValue>(V)) {
  925. // If the entire vector is undef or poison, just return this info.
  926. UndefElts = EltMask;
  927. return nullptr;
  928. }
  929. if (DemandedElts.isNullValue()) { // If nothing is demanded, provide poison.
  930. UndefElts = EltMask;
  931. return PoisonValue::get(V->getType());
  932. }
  933. UndefElts = 0;
  934. if (auto *C = dyn_cast<Constant>(V)) {
  935. // Check if this is identity. If so, return 0 since we are not simplifying
  936. // anything.
  937. if (DemandedElts.isAllOnesValue())
  938. return nullptr;
  939. Type *EltTy = cast<VectorType>(V->getType())->getElementType();
  940. Constant *Poison = PoisonValue::get(EltTy);
  941. SmallVector<Constant*, 16> Elts;
  942. for (unsigned i = 0; i != VWidth; ++i) {
  943. if (!DemandedElts[i]) { // If not demanded, set to poison.
  944. Elts.push_back(Poison);
  945. UndefElts.setBit(i);
  946. continue;
  947. }
  948. Constant *Elt = C->getAggregateElement(i);
  949. if (!Elt) return nullptr;
  950. Elts.push_back(Elt);
  951. if (isa<UndefValue>(Elt)) // Already undef or poison.
  952. UndefElts.setBit(i);
  953. }
  954. // If we changed the constant, return it.
  955. Constant *NewCV = ConstantVector::get(Elts);
  956. return NewCV != C ? NewCV : nullptr;
  957. }
  958. // Limit search depth.
  959. if (Depth == 10)
  960. return nullptr;
  961. if (!AllowMultipleUsers) {
  962. // If multiple users are using the root value, proceed with
  963. // simplification conservatively assuming that all elements
  964. // are needed.
  965. if (!V->hasOneUse()) {
  966. // Quit if we find multiple users of a non-root value though.
  967. // They'll be handled when it's their turn to be visited by
  968. // the main instcombine process.
  969. if (Depth != 0)
  970. // TODO: Just compute the UndefElts information recursively.
  971. return nullptr;
  972. // Conservatively assume that all elements are needed.
  973. DemandedElts = EltMask;
  974. }
  975. }
  976. Instruction *I = dyn_cast<Instruction>(V);
  977. if (!I) return nullptr; // Only analyze instructions.
  978. bool MadeChange = false;
  979. auto simplifyAndSetOp = [&](Instruction *Inst, unsigned OpNum,
  980. APInt Demanded, APInt &Undef) {
  981. auto *II = dyn_cast<IntrinsicInst>(Inst);
  982. Value *Op = II ? II->getArgOperand(OpNum) : Inst->getOperand(OpNum);
  983. if (Value *V = SimplifyDemandedVectorElts(Op, Demanded, Undef, Depth + 1)) {
  984. replaceOperand(*Inst, OpNum, V);
  985. MadeChange = true;
  986. }
  987. };
  988. APInt UndefElts2(VWidth, 0);
  989. APInt UndefElts3(VWidth, 0);
  990. switch (I->getOpcode()) {
  991. default: break;
  992. case Instruction::GetElementPtr: {
  993. // The LangRef requires that struct geps have all constant indices. As
  994. // such, we can't convert any operand to partial undef.
  995. auto mayIndexStructType = [](GetElementPtrInst &GEP) {
  996. for (auto I = gep_type_begin(GEP), E = gep_type_end(GEP);
  997. I != E; I++)
  998. if (I.isStruct())
  999. return true;;
  1000. return false;
  1001. };
  1002. if (mayIndexStructType(cast<GetElementPtrInst>(*I)))
  1003. break;
  1004. // Conservatively track the demanded elements back through any vector
  1005. // operands we may have. We know there must be at least one, or we
  1006. // wouldn't have a vector result to get here. Note that we intentionally
  1007. // merge the undef bits here since gepping with either an undef base or
  1008. // index results in undef.
  1009. for (unsigned i = 0; i < I->getNumOperands(); i++) {
  1010. if (isa<UndefValue>(I->getOperand(i))) {
  1011. // If the entire vector is undefined, just return this info.
  1012. UndefElts = EltMask;
  1013. return nullptr;
  1014. }
  1015. if (I->getOperand(i)->getType()->isVectorTy()) {
  1016. APInt UndefEltsOp(VWidth, 0);
  1017. simplifyAndSetOp(I, i, DemandedElts, UndefEltsOp);
  1018. UndefElts |= UndefEltsOp;
  1019. }
  1020. }
  1021. break;
  1022. }
  1023. case Instruction::InsertElement: {
  1024. // If this is a variable index, we don't know which element it overwrites.
  1025. // demand exactly the same input as we produce.
  1026. ConstantInt *Idx = dyn_cast<ConstantInt>(I->getOperand(2));
  1027. if (!Idx) {
  1028. // Note that we can't propagate undef elt info, because we don't know
  1029. // which elt is getting updated.
  1030. simplifyAndSetOp(I, 0, DemandedElts, UndefElts2);
  1031. break;
  1032. }
  1033. // The element inserted overwrites whatever was there, so the input demanded
  1034. // set is simpler than the output set.
  1035. unsigned IdxNo = Idx->getZExtValue();
  1036. APInt PreInsertDemandedElts = DemandedElts;
  1037. if (IdxNo < VWidth)
  1038. PreInsertDemandedElts.clearBit(IdxNo);
  1039. // If we only demand the element that is being inserted and that element
  1040. // was extracted from the same index in another vector with the same type,
  1041. // replace this insert with that other vector.
  1042. // Note: This is attempted before the call to simplifyAndSetOp because that
  1043. // may change UndefElts to a value that does not match with Vec.
  1044. Value *Vec;
  1045. if (PreInsertDemandedElts == 0 &&
  1046. match(I->getOperand(1),
  1047. m_ExtractElt(m_Value(Vec), m_SpecificInt(IdxNo))) &&
  1048. Vec->getType() == I->getType()) {
  1049. return Vec;
  1050. }
  1051. simplifyAndSetOp(I, 0, PreInsertDemandedElts, UndefElts);
  1052. // If this is inserting an element that isn't demanded, remove this
  1053. // insertelement.
  1054. if (IdxNo >= VWidth || !DemandedElts[IdxNo]) {
  1055. Worklist.push(I);
  1056. return I->getOperand(0);
  1057. }
  1058. // The inserted element is defined.
  1059. UndefElts.clearBit(IdxNo);
  1060. break;
  1061. }
  1062. case Instruction::ShuffleVector: {
  1063. auto *Shuffle = cast<ShuffleVectorInst>(I);
  1064. assert(Shuffle->getOperand(0)->getType() ==
  1065. Shuffle->getOperand(1)->getType() &&
  1066. "Expected shuffle operands to have same type");
  1067. unsigned OpWidth = cast<FixedVectorType>(Shuffle->getOperand(0)->getType())
  1068. ->getNumElements();
  1069. // Handle trivial case of a splat. Only check the first element of LHS
  1070. // operand.
  1071. if (all_of(Shuffle->getShuffleMask(), [](int Elt) { return Elt == 0; }) &&
  1072. DemandedElts.isAllOnesValue()) {
  1073. if (!isa<UndefValue>(I->getOperand(1))) {
  1074. I->setOperand(1, UndefValue::get(I->getOperand(1)->getType()));
  1075. MadeChange = true;
  1076. }
  1077. APInt LeftDemanded(OpWidth, 1);
  1078. APInt LHSUndefElts(OpWidth, 0);
  1079. simplifyAndSetOp(I, 0, LeftDemanded, LHSUndefElts);
  1080. if (LHSUndefElts[0])
  1081. UndefElts = EltMask;
  1082. else
  1083. UndefElts.clearAllBits();
  1084. break;
  1085. }
  1086. APInt LeftDemanded(OpWidth, 0), RightDemanded(OpWidth, 0);
  1087. for (unsigned i = 0; i < VWidth; i++) {
  1088. if (DemandedElts[i]) {
  1089. unsigned MaskVal = Shuffle->getMaskValue(i);
  1090. if (MaskVal != -1u) {
  1091. assert(MaskVal < OpWidth * 2 &&
  1092. "shufflevector mask index out of range!");
  1093. if (MaskVal < OpWidth)
  1094. LeftDemanded.setBit(MaskVal);
  1095. else
  1096. RightDemanded.setBit(MaskVal - OpWidth);
  1097. }
  1098. }
  1099. }
  1100. APInt LHSUndefElts(OpWidth, 0);
  1101. simplifyAndSetOp(I, 0, LeftDemanded, LHSUndefElts);
  1102. APInt RHSUndefElts(OpWidth, 0);
  1103. simplifyAndSetOp(I, 1, RightDemanded, RHSUndefElts);
  1104. // If this shuffle does not change the vector length and the elements
  1105. // demanded by this shuffle are an identity mask, then this shuffle is
  1106. // unnecessary.
  1107. //
  1108. // We are assuming canonical form for the mask, so the source vector is
  1109. // operand 0 and operand 1 is not used.
  1110. //
  1111. // Note that if an element is demanded and this shuffle mask is undefined
  1112. // for that element, then the shuffle is not considered an identity
  1113. // operation. The shuffle prevents poison from the operand vector from
  1114. // leaking to the result by replacing poison with an undefined value.
  1115. if (VWidth == OpWidth) {
  1116. bool IsIdentityShuffle = true;
  1117. for (unsigned i = 0; i < VWidth; i++) {
  1118. unsigned MaskVal = Shuffle->getMaskValue(i);
  1119. if (DemandedElts[i] && i != MaskVal) {
  1120. IsIdentityShuffle = false;
  1121. break;
  1122. }
  1123. }
  1124. if (IsIdentityShuffle)
  1125. return Shuffle->getOperand(0);
  1126. }
  1127. bool NewUndefElts = false;
  1128. unsigned LHSIdx = -1u, LHSValIdx = -1u;
  1129. unsigned RHSIdx = -1u, RHSValIdx = -1u;
  1130. bool LHSUniform = true;
  1131. bool RHSUniform = true;
  1132. for (unsigned i = 0; i < VWidth; i++) {
  1133. unsigned MaskVal = Shuffle->getMaskValue(i);
  1134. if (MaskVal == -1u) {
  1135. UndefElts.setBit(i);
  1136. } else if (!DemandedElts[i]) {
  1137. NewUndefElts = true;
  1138. UndefElts.setBit(i);
  1139. } else if (MaskVal < OpWidth) {
  1140. if (LHSUndefElts[MaskVal]) {
  1141. NewUndefElts = true;
  1142. UndefElts.setBit(i);
  1143. } else {
  1144. LHSIdx = LHSIdx == -1u ? i : OpWidth;
  1145. LHSValIdx = LHSValIdx == -1u ? MaskVal : OpWidth;
  1146. LHSUniform = LHSUniform && (MaskVal == i);
  1147. }
  1148. } else {
  1149. if (RHSUndefElts[MaskVal - OpWidth]) {
  1150. NewUndefElts = true;
  1151. UndefElts.setBit(i);
  1152. } else {
  1153. RHSIdx = RHSIdx == -1u ? i : OpWidth;
  1154. RHSValIdx = RHSValIdx == -1u ? MaskVal - OpWidth : OpWidth;
  1155. RHSUniform = RHSUniform && (MaskVal - OpWidth == i);
  1156. }
  1157. }
  1158. }
  1159. // Try to transform shuffle with constant vector and single element from
  1160. // this constant vector to single insertelement instruction.
  1161. // shufflevector V, C, <v1, v2, .., ci, .., vm> ->
  1162. // insertelement V, C[ci], ci-n
  1163. if (OpWidth ==
  1164. cast<FixedVectorType>(Shuffle->getType())->getNumElements()) {
  1165. Value *Op = nullptr;
  1166. Constant *Value = nullptr;
  1167. unsigned Idx = -1u;
  1168. // Find constant vector with the single element in shuffle (LHS or RHS).
  1169. if (LHSIdx < OpWidth && RHSUniform) {
  1170. if (auto *CV = dyn_cast<ConstantVector>(Shuffle->getOperand(0))) {
  1171. Op = Shuffle->getOperand(1);
  1172. Value = CV->getOperand(LHSValIdx);
  1173. Idx = LHSIdx;
  1174. }
  1175. }
  1176. if (RHSIdx < OpWidth && LHSUniform) {
  1177. if (auto *CV = dyn_cast<ConstantVector>(Shuffle->getOperand(1))) {
  1178. Op = Shuffle->getOperand(0);
  1179. Value = CV->getOperand(RHSValIdx);
  1180. Idx = RHSIdx;
  1181. }
  1182. }
  1183. // Found constant vector with single element - convert to insertelement.
  1184. if (Op && Value) {
  1185. Instruction *New = InsertElementInst::Create(
  1186. Op, Value, ConstantInt::get(Type::getInt32Ty(I->getContext()), Idx),
  1187. Shuffle->getName());
  1188. InsertNewInstWith(New, *Shuffle);
  1189. return New;
  1190. }
  1191. }
  1192. if (NewUndefElts) {
  1193. // Add additional discovered undefs.
  1194. SmallVector<int, 16> Elts;
  1195. for (unsigned i = 0; i < VWidth; ++i) {
  1196. if (UndefElts[i])
  1197. Elts.push_back(UndefMaskElem);
  1198. else
  1199. Elts.push_back(Shuffle->getMaskValue(i));
  1200. }
  1201. Shuffle->setShuffleMask(Elts);
  1202. MadeChange = true;
  1203. }
  1204. break;
  1205. }
  1206. case Instruction::Select: {
  1207. // If this is a vector select, try to transform the select condition based
  1208. // on the current demanded elements.
  1209. SelectInst *Sel = cast<SelectInst>(I);
  1210. if (Sel->getCondition()->getType()->isVectorTy()) {
  1211. // TODO: We are not doing anything with UndefElts based on this call.
  1212. // It is overwritten below based on the other select operands. If an
  1213. // element of the select condition is known undef, then we are free to
  1214. // choose the output value from either arm of the select. If we know that
  1215. // one of those values is undef, then the output can be undef.
  1216. simplifyAndSetOp(I, 0, DemandedElts, UndefElts);
  1217. }
  1218. // Next, see if we can transform the arms of the select.
  1219. APInt DemandedLHS(DemandedElts), DemandedRHS(DemandedElts);
  1220. if (auto *CV = dyn_cast<ConstantVector>(Sel->getCondition())) {
  1221. for (unsigned i = 0; i < VWidth; i++) {
  1222. // isNullValue() always returns false when called on a ConstantExpr.
  1223. // Skip constant expressions to avoid propagating incorrect information.
  1224. Constant *CElt = CV->getAggregateElement(i);
  1225. if (isa<ConstantExpr>(CElt))
  1226. continue;
  1227. // TODO: If a select condition element is undef, we can demand from
  1228. // either side. If one side is known undef, choosing that side would
  1229. // propagate undef.
  1230. if (CElt->isNullValue())
  1231. DemandedLHS.clearBit(i);
  1232. else
  1233. DemandedRHS.clearBit(i);
  1234. }
  1235. }
  1236. simplifyAndSetOp(I, 1, DemandedLHS, UndefElts2);
  1237. simplifyAndSetOp(I, 2, DemandedRHS, UndefElts3);
  1238. // Output elements are undefined if the element from each arm is undefined.
  1239. // TODO: This can be improved. See comment in select condition handling.
  1240. UndefElts = UndefElts2 & UndefElts3;
  1241. break;
  1242. }
  1243. case Instruction::BitCast: {
  1244. // Vector->vector casts only.
  1245. VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType());
  1246. if (!VTy) break;
  1247. unsigned InVWidth = cast<FixedVectorType>(VTy)->getNumElements();
  1248. APInt InputDemandedElts(InVWidth, 0);
  1249. UndefElts2 = APInt(InVWidth, 0);
  1250. unsigned Ratio;
  1251. if (VWidth == InVWidth) {
  1252. // If we are converting from <4 x i32> -> <4 x f32>, we demand the same
  1253. // elements as are demanded of us.
  1254. Ratio = 1;
  1255. InputDemandedElts = DemandedElts;
  1256. } else if ((VWidth % InVWidth) == 0) {
  1257. // If the number of elements in the output is a multiple of the number of
  1258. // elements in the input then an input element is live if any of the
  1259. // corresponding output elements are live.
  1260. Ratio = VWidth / InVWidth;
  1261. for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
  1262. if (DemandedElts[OutIdx])
  1263. InputDemandedElts.setBit(OutIdx / Ratio);
  1264. } else if ((InVWidth % VWidth) == 0) {
  1265. // If the number of elements in the input is a multiple of the number of
  1266. // elements in the output then an input element is live if the
  1267. // corresponding output element is live.
  1268. Ratio = InVWidth / VWidth;
  1269. for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
  1270. if (DemandedElts[InIdx / Ratio])
  1271. InputDemandedElts.setBit(InIdx);
  1272. } else {
  1273. // Unsupported so far.
  1274. break;
  1275. }
  1276. simplifyAndSetOp(I, 0, InputDemandedElts, UndefElts2);
  1277. if (VWidth == InVWidth) {
  1278. UndefElts = UndefElts2;
  1279. } else if ((VWidth % InVWidth) == 0) {
  1280. // If the number of elements in the output is a multiple of the number of
  1281. // elements in the input then an output element is undef if the
  1282. // corresponding input element is undef.
  1283. for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
  1284. if (UndefElts2[OutIdx / Ratio])
  1285. UndefElts.setBit(OutIdx);
  1286. } else if ((InVWidth % VWidth) == 0) {
  1287. // If the number of elements in the input is a multiple of the number of
  1288. // elements in the output then an output element is undef if all of the
  1289. // corresponding input elements are undef.
  1290. for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) {
  1291. APInt SubUndef = UndefElts2.lshr(OutIdx * Ratio).zextOrTrunc(Ratio);
  1292. if (SubUndef.countPopulation() == Ratio)
  1293. UndefElts.setBit(OutIdx);
  1294. }
  1295. } else {
  1296. llvm_unreachable("Unimp");
  1297. }
  1298. break;
  1299. }
  1300. case Instruction::FPTrunc:
  1301. case Instruction::FPExt:
  1302. simplifyAndSetOp(I, 0, DemandedElts, UndefElts);
  1303. break;
  1304. case Instruction::Call: {
  1305. IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
  1306. if (!II) break;
  1307. switch (II->getIntrinsicID()) {
  1308. case Intrinsic::masked_gather: // fallthrough
  1309. case Intrinsic::masked_load: {
  1310. // Subtlety: If we load from a pointer, the pointer must be valid
  1311. // regardless of whether the element is demanded. Doing otherwise risks
  1312. // segfaults which didn't exist in the original program.
  1313. APInt DemandedPtrs(APInt::getAllOnesValue(VWidth)),
  1314. DemandedPassThrough(DemandedElts);
  1315. if (auto *CV = dyn_cast<ConstantVector>(II->getOperand(2)))
  1316. for (unsigned i = 0; i < VWidth; i++) {
  1317. Constant *CElt = CV->getAggregateElement(i);
  1318. if (CElt->isNullValue())
  1319. DemandedPtrs.clearBit(i);
  1320. else if (CElt->isAllOnesValue())
  1321. DemandedPassThrough.clearBit(i);
  1322. }
  1323. if (II->getIntrinsicID() == Intrinsic::masked_gather)
  1324. simplifyAndSetOp(II, 0, DemandedPtrs, UndefElts2);
  1325. simplifyAndSetOp(II, 3, DemandedPassThrough, UndefElts3);
  1326. // Output elements are undefined if the element from both sources are.
  1327. // TODO: can strengthen via mask as well.
  1328. UndefElts = UndefElts2 & UndefElts3;
  1329. break;
  1330. }
  1331. default: {
  1332. // Handle target specific intrinsics
  1333. Optional<Value *> V = targetSimplifyDemandedVectorEltsIntrinsic(
  1334. *II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
  1335. simplifyAndSetOp);
  1336. if (V.hasValue())
  1337. return V.getValue();
  1338. break;
  1339. }
  1340. } // switch on IntrinsicID
  1341. break;
  1342. } // case Call
  1343. } // switch on Opcode
  1344. // TODO: We bail completely on integer div/rem and shifts because they have
  1345. // UB/poison potential, but that should be refined.
  1346. BinaryOperator *BO;
  1347. if (match(I, m_BinOp(BO)) && !BO->isIntDivRem() && !BO->isShift()) {
  1348. simplifyAndSetOp(I, 0, DemandedElts, UndefElts);
  1349. simplifyAndSetOp(I, 1, DemandedElts, UndefElts2);
  1350. // Any change to an instruction with potential poison must clear those flags
  1351. // because we can not guarantee those constraints now. Other analysis may
  1352. // determine that it is safe to re-apply the flags.
  1353. if (MadeChange)
  1354. BO->dropPoisonGeneratingFlags();
  1355. // Output elements are undefined if both are undefined. Consider things
  1356. // like undef & 0. The result is known zero, not undef.
  1357. UndefElts &= UndefElts2;
  1358. }
  1359. // If we've proven all of the lanes undef, return an undef value.
  1360. // TODO: Intersect w/demanded lanes
  1361. if (UndefElts.isAllOnesValue())
  1362. return UndefValue::get(I->getType());;
  1363. return MadeChange ? I : nullptr;
  1364. }