123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626 |
- //===- DemandedBits.cpp - Determine demanded bits -------------------------===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- //
- // This pass implements a demanded bits analysis. A demanded bit is one that
- // contributes to a result; bits that are not demanded can be either zero or
- // one without affecting control or data flow. For example in this sequence:
- //
- // %1 = add i32 %x, %y
- // %2 = trunc i32 %1 to i16
- //
- // Only the lowest 16 bits of %1 are demanded; the rest are removed by the
- // trunc.
- //
- //===----------------------------------------------------------------------===//
- #include "llvm/Analysis/DemandedBits.h"
- #include "llvm/ADT/APInt.h"
- #include "llvm/ADT/SetVector.h"
- #include "llvm/Analysis/AssumptionCache.h"
- #include "llvm/Analysis/ValueTracking.h"
- #include "llvm/IR/DataLayout.h"
- #include "llvm/IR/Dominators.h"
- #include "llvm/IR/InstIterator.h"
- #include "llvm/IR/Instruction.h"
- #include "llvm/IR/IntrinsicInst.h"
- #include "llvm/IR/Module.h"
- #include "llvm/IR/Operator.h"
- #include "llvm/IR/PassManager.h"
- #include "llvm/IR/PatternMatch.h"
- #include "llvm/IR/Type.h"
- #include "llvm/IR/Use.h"
- #include "llvm/InitializePasses.h"
- #include "llvm/Pass.h"
- #include "llvm/Support/Casting.h"
- #include "llvm/Support/Debug.h"
- #include "llvm/Support/KnownBits.h"
- #include "llvm/Support/raw_ostream.h"
- #include <algorithm>
- #include <cstdint>
- using namespace llvm;
- using namespace llvm::PatternMatch;
- #define DEBUG_TYPE "demanded-bits"
- char DemandedBitsWrapperPass::ID = 0;
- INITIALIZE_PASS_BEGIN(DemandedBitsWrapperPass, "demanded-bits",
- "Demanded bits analysis", false, false)
- INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
- INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
- INITIALIZE_PASS_END(DemandedBitsWrapperPass, "demanded-bits",
- "Demanded bits analysis", false, false)
- DemandedBitsWrapperPass::DemandedBitsWrapperPass() : FunctionPass(ID) {
- initializeDemandedBitsWrapperPassPass(*PassRegistry::getPassRegistry());
- }
- void DemandedBitsWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.setPreservesAll();
- }
- void DemandedBitsWrapperPass::print(raw_ostream &OS, const Module *M) const {
- DB->print(OS);
- }
- static bool isAlwaysLive(Instruction *I) {
- return I->isTerminator() || isa<DbgInfoIntrinsic>(I) || I->isEHPad() ||
- I->mayHaveSideEffects();
- }
- void DemandedBits::determineLiveOperandBits(
- const Instruction *UserI, const Value *Val, unsigned OperandNo,
- const APInt &AOut, APInt &AB, KnownBits &Known, KnownBits &Known2,
- bool &KnownBitsComputed) {
- unsigned BitWidth = AB.getBitWidth();
- // We're called once per operand, but for some instructions, we need to
- // compute known bits of both operands in order to determine the live bits of
- // either (when both operands are instructions themselves). We don't,
- // however, want to do this twice, so we cache the result in APInts that live
- // in the caller. For the two-relevant-operands case, both operand values are
- // provided here.
- auto ComputeKnownBits =
- [&](unsigned BitWidth, const Value *V1, const Value *V2) {
- if (KnownBitsComputed)
- return;
- KnownBitsComputed = true;
- const DataLayout &DL = UserI->getModule()->getDataLayout();
- Known = KnownBits(BitWidth);
- computeKnownBits(V1, Known, DL, 0, &AC, UserI, &DT);
- if (V2) {
- Known2 = KnownBits(BitWidth);
- computeKnownBits(V2, Known2, DL, 0, &AC, UserI, &DT);
- }
- };
- switch (UserI->getOpcode()) {
- default: break;
- case Instruction::Call:
- case Instruction::Invoke:
- if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(UserI)) {
- switch (II->getIntrinsicID()) {
- default: break;
- case Intrinsic::bswap:
- // The alive bits of the input are the swapped alive bits of
- // the output.
- AB = AOut.byteSwap();
- break;
- case Intrinsic::bitreverse:
- // The alive bits of the input are the reversed alive bits of
- // the output.
- AB = AOut.reverseBits();
- break;
- case Intrinsic::ctlz:
- if (OperandNo == 0) {
- // We need some output bits, so we need all bits of the
- // input to the left of, and including, the leftmost bit
- // known to be one.
- ComputeKnownBits(BitWidth, Val, nullptr);
- AB = APInt::getHighBitsSet(BitWidth,
- std::min(BitWidth, Known.countMaxLeadingZeros()+1));
- }
- break;
- case Intrinsic::cttz:
- if (OperandNo == 0) {
- // We need some output bits, so we need all bits of the
- // input to the right of, and including, the rightmost bit
- // known to be one.
- ComputeKnownBits(BitWidth, Val, nullptr);
- AB = APInt::getLowBitsSet(BitWidth,
- std::min(BitWidth, Known.countMaxTrailingZeros()+1));
- }
- break;
- case Intrinsic::fshl:
- case Intrinsic::fshr: {
- const APInt *SA;
- if (OperandNo == 2) {
- // Shift amount is modulo the bitwidth. For powers of two we have
- // SA % BW == SA & (BW - 1).
- if (isPowerOf2_32(BitWidth))
- AB = BitWidth - 1;
- } else if (match(II->getOperand(2), m_APInt(SA))) {
- // Normalize to funnel shift left. APInt shifts of BitWidth are well-
- // defined, so no need to special-case zero shifts here.
- uint64_t ShiftAmt = SA->urem(BitWidth);
- if (II->getIntrinsicID() == Intrinsic::fshr)
- ShiftAmt = BitWidth - ShiftAmt;
- if (OperandNo == 0)
- AB = AOut.lshr(ShiftAmt);
- else if (OperandNo == 1)
- AB = AOut.shl(BitWidth - ShiftAmt);
- }
- break;
- }
- case Intrinsic::umax:
- case Intrinsic::umin:
- case Intrinsic::smax:
- case Intrinsic::smin:
- // If low bits of result are not demanded, they are also not demanded
- // for the min/max operands.
- AB = APInt::getBitsSetFrom(BitWidth, AOut.countTrailingZeros());
- break;
- }
- }
- break;
- case Instruction::Add:
- if (AOut.isMask()) {
- AB = AOut;
- } else {
- ComputeKnownBits(BitWidth, UserI->getOperand(0), UserI->getOperand(1));
- AB = determineLiveOperandBitsAdd(OperandNo, AOut, Known, Known2);
- }
- break;
- case Instruction::Sub:
- if (AOut.isMask()) {
- AB = AOut;
- } else {
- ComputeKnownBits(BitWidth, UserI->getOperand(0), UserI->getOperand(1));
- AB = determineLiveOperandBitsSub(OperandNo, AOut, Known, Known2);
- }
- break;
- case Instruction::Mul:
- // Find the highest live output bit. We don't need any more input
- // bits than that (adds, and thus subtracts, ripple only to the
- // left).
- AB = APInt::getLowBitsSet(BitWidth, AOut.getActiveBits());
- break;
- case Instruction::Shl:
- if (OperandNo == 0) {
- const APInt *ShiftAmtC;
- if (match(UserI->getOperand(1), m_APInt(ShiftAmtC))) {
- uint64_t ShiftAmt = ShiftAmtC->getLimitedValue(BitWidth - 1);
- AB = AOut.lshr(ShiftAmt);
- // If the shift is nuw/nsw, then the high bits are not dead
- // (because we've promised that they *must* be zero).
- const ShlOperator *S = cast<ShlOperator>(UserI);
- if (S->hasNoSignedWrap())
- AB |= APInt::getHighBitsSet(BitWidth, ShiftAmt+1);
- else if (S->hasNoUnsignedWrap())
- AB |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
- }
- }
- break;
- case Instruction::LShr:
- if (OperandNo == 0) {
- const APInt *ShiftAmtC;
- if (match(UserI->getOperand(1), m_APInt(ShiftAmtC))) {
- uint64_t ShiftAmt = ShiftAmtC->getLimitedValue(BitWidth - 1);
- AB = AOut.shl(ShiftAmt);
- // If the shift is exact, then the low bits are not dead
- // (they must be zero).
- if (cast<LShrOperator>(UserI)->isExact())
- AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
- }
- }
- break;
- case Instruction::AShr:
- if (OperandNo == 0) {
- const APInt *ShiftAmtC;
- if (match(UserI->getOperand(1), m_APInt(ShiftAmtC))) {
- uint64_t ShiftAmt = ShiftAmtC->getLimitedValue(BitWidth - 1);
- AB = AOut.shl(ShiftAmt);
- // Because the high input bit is replicated into the
- // high-order bits of the result, if we need any of those
- // bits, then we must keep the highest input bit.
- if ((AOut & APInt::getHighBitsSet(BitWidth, ShiftAmt))
- .getBoolValue())
- AB.setSignBit();
- // If the shift is exact, then the low bits are not dead
- // (they must be zero).
- if (cast<AShrOperator>(UserI)->isExact())
- AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
- }
- }
- break;
- case Instruction::And:
- AB = AOut;
- // For bits that are known zero, the corresponding bits in the
- // other operand are dead (unless they're both zero, in which
- // case they can't both be dead, so just mark the LHS bits as
- // dead).
- ComputeKnownBits(BitWidth, UserI->getOperand(0), UserI->getOperand(1));
- if (OperandNo == 0)
- AB &= ~Known2.Zero;
- else
- AB &= ~(Known.Zero & ~Known2.Zero);
- break;
- case Instruction::Or:
- AB = AOut;
- // For bits that are known one, the corresponding bits in the
- // other operand are dead (unless they're both one, in which
- // case they can't both be dead, so just mark the LHS bits as
- // dead).
- ComputeKnownBits(BitWidth, UserI->getOperand(0), UserI->getOperand(1));
- if (OperandNo == 0)
- AB &= ~Known2.One;
- else
- AB &= ~(Known.One & ~Known2.One);
- break;
- case Instruction::Xor:
- case Instruction::PHI:
- AB = AOut;
- break;
- case Instruction::Trunc:
- AB = AOut.zext(BitWidth);
- break;
- case Instruction::ZExt:
- AB = AOut.trunc(BitWidth);
- break;
- case Instruction::SExt:
- AB = AOut.trunc(BitWidth);
- // Because the high input bit is replicated into the
- // high-order bits of the result, if we need any of those
- // bits, then we must keep the highest input bit.
- if ((AOut & APInt::getHighBitsSet(AOut.getBitWidth(),
- AOut.getBitWidth() - BitWidth))
- .getBoolValue())
- AB.setSignBit();
- break;
- case Instruction::Select:
- if (OperandNo != 0)
- AB = AOut;
- break;
- case Instruction::ExtractElement:
- if (OperandNo == 0)
- AB = AOut;
- break;
- case Instruction::InsertElement:
- case Instruction::ShuffleVector:
- if (OperandNo == 0 || OperandNo == 1)
- AB = AOut;
- break;
- }
- }
- bool DemandedBitsWrapperPass::runOnFunction(Function &F) {
- auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- DB.emplace(F, AC, DT);
- return false;
- }
- void DemandedBitsWrapperPass::releaseMemory() {
- DB.reset();
- }
- void DemandedBits::performAnalysis() {
- if (Analyzed)
- // Analysis already completed for this function.
- return;
- Analyzed = true;
- Visited.clear();
- AliveBits.clear();
- DeadUses.clear();
- SmallSetVector<Instruction*, 16> Worklist;
- // Collect the set of "root" instructions that are known live.
- for (Instruction &I : instructions(F)) {
- if (!isAlwaysLive(&I))
- continue;
- LLVM_DEBUG(dbgs() << "DemandedBits: Root: " << I << "\n");
- // For integer-valued instructions, set up an initial empty set of alive
- // bits and add the instruction to the work list. For other instructions
- // add their operands to the work list (for integer values operands, mark
- // all bits as live).
- Type *T = I.getType();
- if (T->isIntOrIntVectorTy()) {
- if (AliveBits.try_emplace(&I, T->getScalarSizeInBits(), 0).second)
- Worklist.insert(&I);
- continue;
- }
- // Non-integer-typed instructions...
- for (Use &OI : I.operands()) {
- if (Instruction *J = dyn_cast<Instruction>(OI)) {
- Type *T = J->getType();
- if (T->isIntOrIntVectorTy())
- AliveBits[J] = APInt::getAllOnes(T->getScalarSizeInBits());
- else
- Visited.insert(J);
- Worklist.insert(J);
- }
- }
- // To save memory, we don't add I to the Visited set here. Instead, we
- // check isAlwaysLive on every instruction when searching for dead
- // instructions later (we need to check isAlwaysLive for the
- // integer-typed instructions anyway).
- }
- // Propagate liveness backwards to operands.
- while (!Worklist.empty()) {
- Instruction *UserI = Worklist.pop_back_val();
- LLVM_DEBUG(dbgs() << "DemandedBits: Visiting: " << *UserI);
- APInt AOut;
- bool InputIsKnownDead = false;
- if (UserI->getType()->isIntOrIntVectorTy()) {
- AOut = AliveBits[UserI];
- LLVM_DEBUG(dbgs() << " Alive Out: 0x"
- << Twine::utohexstr(AOut.getLimitedValue()));
- // If all bits of the output are dead, then all bits of the input
- // are also dead.
- InputIsKnownDead = !AOut && !isAlwaysLive(UserI);
- }
- LLVM_DEBUG(dbgs() << "\n");
- KnownBits Known, Known2;
- bool KnownBitsComputed = false;
- // Compute the set of alive bits for each operand. These are anded into the
- // existing set, if any, and if that changes the set of alive bits, the
- // operand is added to the work-list.
- for (Use &OI : UserI->operands()) {
- // We also want to detect dead uses of arguments, but will only store
- // demanded bits for instructions.
- Instruction *I = dyn_cast<Instruction>(OI);
- if (!I && !isa<Argument>(OI))
- continue;
- Type *T = OI->getType();
- if (T->isIntOrIntVectorTy()) {
- unsigned BitWidth = T->getScalarSizeInBits();
- APInt AB = APInt::getAllOnes(BitWidth);
- if (InputIsKnownDead) {
- AB = APInt(BitWidth, 0);
- } else {
- // Bits of each operand that are used to compute alive bits of the
- // output are alive, all others are dead.
- determineLiveOperandBits(UserI, OI, OI.getOperandNo(), AOut, AB,
- Known, Known2, KnownBitsComputed);
- // Keep track of uses which have no demanded bits.
- if (AB.isZero())
- DeadUses.insert(&OI);
- else
- DeadUses.erase(&OI);
- }
- if (I) {
- // If we've added to the set of alive bits (or the operand has not
- // been previously visited), then re-queue the operand to be visited
- // again.
- auto Res = AliveBits.try_emplace(I);
- if (Res.second || (AB |= Res.first->second) != Res.first->second) {
- Res.first->second = std::move(AB);
- Worklist.insert(I);
- }
- }
- } else if (I && Visited.insert(I).second) {
- Worklist.insert(I);
- }
- }
- }
- }
- APInt DemandedBits::getDemandedBits(Instruction *I) {
- performAnalysis();
- auto Found = AliveBits.find(I);
- if (Found != AliveBits.end())
- return Found->second;
- const DataLayout &DL = I->getModule()->getDataLayout();
- return APInt::getAllOnes(DL.getTypeSizeInBits(I->getType()->getScalarType()));
- }
- APInt DemandedBits::getDemandedBits(Use *U) {
- Type *T = (*U)->getType();
- Instruction *UserI = cast<Instruction>(U->getUser());
- const DataLayout &DL = UserI->getModule()->getDataLayout();
- unsigned BitWidth = DL.getTypeSizeInBits(T->getScalarType());
- // We only track integer uses, everything else produces a mask with all bits
- // set
- if (!T->isIntOrIntVectorTy())
- return APInt::getAllOnes(BitWidth);
- if (isUseDead(U))
- return APInt(BitWidth, 0);
- performAnalysis();
- APInt AOut = getDemandedBits(UserI);
- APInt AB = APInt::getAllOnes(BitWidth);
- KnownBits Known, Known2;
- bool KnownBitsComputed = false;
- determineLiveOperandBits(UserI, *U, U->getOperandNo(), AOut, AB, Known,
- Known2, KnownBitsComputed);
- return AB;
- }
- bool DemandedBits::isInstructionDead(Instruction *I) {
- performAnalysis();
- return !Visited.count(I) && AliveBits.find(I) == AliveBits.end() &&
- !isAlwaysLive(I);
- }
- bool DemandedBits::isUseDead(Use *U) {
- // We only track integer uses, everything else is assumed live.
- if (!(*U)->getType()->isIntOrIntVectorTy())
- return false;
- // Uses by always-live instructions are never dead.
- Instruction *UserI = cast<Instruction>(U->getUser());
- if (isAlwaysLive(UserI))
- return false;
- performAnalysis();
- if (DeadUses.count(U))
- return true;
- // If no output bits are demanded, no input bits are demanded and the use
- // is dead. These uses might not be explicitly present in the DeadUses map.
- if (UserI->getType()->isIntOrIntVectorTy()) {
- auto Found = AliveBits.find(UserI);
- if (Found != AliveBits.end() && Found->second.isZero())
- return true;
- }
- return false;
- }
- void DemandedBits::print(raw_ostream &OS) {
- auto PrintDB = [&](const Instruction *I, const APInt &A, Value *V = nullptr) {
- OS << "DemandedBits: 0x" << Twine::utohexstr(A.getLimitedValue())
- << " for ";
- if (V) {
- V->printAsOperand(OS, false);
- OS << " in ";
- }
- OS << *I << '\n';
- };
- performAnalysis();
- for (auto &KV : AliveBits) {
- Instruction *I = KV.first;
- PrintDB(I, KV.second);
- for (Use &OI : I->operands()) {
- PrintDB(I, getDemandedBits(&OI), OI);
- }
- }
- }
- static APInt determineLiveOperandBitsAddCarry(unsigned OperandNo,
- const APInt &AOut,
- const KnownBits &LHS,
- const KnownBits &RHS,
- bool CarryZero, bool CarryOne) {
- assert(!(CarryZero && CarryOne) &&
- "Carry can't be zero and one at the same time");
- // The following check should be done by the caller, as it also indicates
- // that LHS and RHS don't need to be computed.
- //
- // if (AOut.isMask())
- // return AOut;
- // Boundary bits' carry out is unaffected by their carry in.
- APInt Bound = (LHS.Zero & RHS.Zero) | (LHS.One & RHS.One);
- // First, the alive carry bits are determined from the alive output bits:
- // Let demand ripple to the right but only up to any set bit in Bound.
- // AOut = -1----
- // Bound = ----1-
- // ACarry&~AOut = --111-
- APInt RBound = Bound.reverseBits();
- APInt RAOut = AOut.reverseBits();
- APInt RProp = RAOut + (RAOut | ~RBound);
- APInt RACarry = RProp ^ ~RBound;
- APInt ACarry = RACarry.reverseBits();
- // Then, the alive input bits are determined from the alive carry bits:
- APInt NeededToMaintainCarryZero;
- APInt NeededToMaintainCarryOne;
- if (OperandNo == 0) {
- NeededToMaintainCarryZero = LHS.Zero | ~RHS.Zero;
- NeededToMaintainCarryOne = LHS.One | ~RHS.One;
- } else {
- NeededToMaintainCarryZero = RHS.Zero | ~LHS.Zero;
- NeededToMaintainCarryOne = RHS.One | ~LHS.One;
- }
- // As in computeForAddCarry
- APInt PossibleSumZero = ~LHS.Zero + ~RHS.Zero + !CarryZero;
- APInt PossibleSumOne = LHS.One + RHS.One + CarryOne;
- // The below is simplified from
- //
- // APInt CarryKnownZero = ~(PossibleSumZero ^ LHS.Zero ^ RHS.Zero);
- // APInt CarryKnownOne = PossibleSumOne ^ LHS.One ^ RHS.One;
- // APInt CarryUnknown = ~(CarryKnownZero | CarryKnownOne);
- //
- // APInt NeededToMaintainCarry =
- // (CarryKnownZero & NeededToMaintainCarryZero) |
- // (CarryKnownOne & NeededToMaintainCarryOne) |
- // CarryUnknown;
- APInt NeededToMaintainCarry = (~PossibleSumZero | NeededToMaintainCarryZero) &
- (PossibleSumOne | NeededToMaintainCarryOne);
- APInt AB = AOut | (ACarry & NeededToMaintainCarry);
- return AB;
- }
- APInt DemandedBits::determineLiveOperandBitsAdd(unsigned OperandNo,
- const APInt &AOut,
- const KnownBits &LHS,
- const KnownBits &RHS) {
- return determineLiveOperandBitsAddCarry(OperandNo, AOut, LHS, RHS, true,
- false);
- }
- APInt DemandedBits::determineLiveOperandBitsSub(unsigned OperandNo,
- const APInt &AOut,
- const KnownBits &LHS,
- const KnownBits &RHS) {
- KnownBits NRHS;
- NRHS.Zero = RHS.One;
- NRHS.One = RHS.Zero;
- return determineLiveOperandBitsAddCarry(OperandNo, AOut, LHS, NRHS, false,
- true);
- }
- FunctionPass *llvm::createDemandedBitsWrapperPass() {
- return new DemandedBitsWrapperPass();
- }
- AnalysisKey DemandedBitsAnalysis::Key;
- DemandedBits DemandedBitsAnalysis::run(Function &F,
- FunctionAnalysisManager &AM) {
- auto &AC = AM.getResult<AssumptionAnalysis>(F);
- auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
- return DemandedBits(F, AC, DT);
- }
- PreservedAnalyses DemandedBitsPrinterPass::run(Function &F,
- FunctionAnalysisManager &AM) {
- AM.getResult<DemandedBitsAnalysis>(F).print(OS);
- return PreservedAnalyses::all();
- }
|