|
- //===- RISCVInsertVSETVLI.cpp - Insert VSETVLI instructions ---------------===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- //
- // This file implements a function pass that inserts VSETVLI instructions where
- // needed and expands the vl outputs of VLEFF/VLSEGFF to PseudoReadVL
- // instructions.
- //
- // This pass consists of 3 phases:
- //
- // Phase 1 collects how each basic block affects VL/VTYPE.
- //
- // Phase 2 uses the information from phase 1 to do a data flow analysis to
- // propagate the VL/VTYPE changes through the function. This gives us the
- // VL/VTYPE at the start of each basic block.
- //
- // Phase 3 inserts VSETVLI instructions in each basic block. Information from
- // phase 2 is used to prevent inserting a VSETVLI before the first vector
- // instruction in the block if possible.
- //
- //===----------------------------------------------------------------------===//
- #include "RISCV.h"
- #include "RISCVSubtarget.h"
- #include "llvm/CodeGen/LiveIntervals.h"
- #include "llvm/CodeGen/MachineFunctionPass.h"
- #include <queue>
- using namespace llvm;
- #define DEBUG_TYPE "riscv-insert-vsetvli"
- #define RISCV_INSERT_VSETVLI_NAME "RISCV Insert VSETVLI pass"
- static cl::opt<bool> DisableInsertVSETVLPHIOpt(
- "riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden,
- cl::desc("Disable looking through phis when inserting vsetvlis."));
- static cl::opt<bool> UseStrictAsserts(
- "riscv-insert-vsetvl-strict-asserts", cl::init(true), cl::Hidden,
- cl::desc("Enable strict assertion checking for the dataflow algorithm"));
- namespace {
- static unsigned getVLOpNum(const MachineInstr &MI) {
- return RISCVII::getVLOpNum(MI.getDesc());
- }
- static unsigned getSEWOpNum(const MachineInstr &MI) {
- return RISCVII::getSEWOpNum(MI.getDesc());
- }
- static bool isVectorConfigInstr(const MachineInstr &MI) {
- return MI.getOpcode() == RISCV::PseudoVSETVLI ||
- MI.getOpcode() == RISCV::PseudoVSETVLIX0 ||
- MI.getOpcode() == RISCV::PseudoVSETIVLI;
- }
- /// Return true if this is 'vsetvli x0, x0, vtype' which preserves
- /// VL and only sets VTYPE.
- static bool isVLPreservingConfig(const MachineInstr &MI) {
- if (MI.getOpcode() != RISCV::PseudoVSETVLIX0)
- return false;
- assert(RISCV::X0 == MI.getOperand(1).getReg());
- return RISCV::X0 == MI.getOperand(0).getReg();
- }
- static uint16_t getRVVMCOpcode(uint16_t RVVPseudoOpcode) {
- const RISCVVPseudosTable::PseudoInfo *RVV =
- RISCVVPseudosTable::getPseudoInfo(RVVPseudoOpcode);
- if (!RVV)
- return 0;
- return RVV->BaseInstr;
- }
- static bool isScalarMoveInstr(const MachineInstr &MI) {
- switch (getRVVMCOpcode(MI.getOpcode())) {
- default:
- return false;
- case RISCV::VMV_S_X:
- case RISCV::VFMV_S_F:
- return true;
- }
- }
- /// Get the EEW for a load or store instruction. Return std::nullopt if MI is
- /// not a load or store which ignores SEW.
- static std::optional<unsigned> getEEWForLoadStore(const MachineInstr &MI) {
- switch (getRVVMCOpcode(MI.getOpcode())) {
- default:
- return std::nullopt;
- case RISCV::VLE8_V:
- case RISCV::VLSE8_V:
- case RISCV::VSE8_V:
- case RISCV::VSSE8_V:
- return 8;
- case RISCV::VLE16_V:
- case RISCV::VLSE16_V:
- case RISCV::VSE16_V:
- case RISCV::VSSE16_V:
- return 16;
- case RISCV::VLE32_V:
- case RISCV::VLSE32_V:
- case RISCV::VSE32_V:
- case RISCV::VSSE32_V:
- return 32;
- case RISCV::VLE64_V:
- case RISCV::VLSE64_V:
- case RISCV::VSE64_V:
- case RISCV::VSSE64_V:
- return 64;
- }
- }
- /// Return true if this is an operation on mask registers. Note that
- /// this includes both arithmetic/logical ops and load/store (vlm/vsm).
- static bool isMaskRegOp(const MachineInstr &MI) {
- if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags))
- return false;
- const unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
- // A Log2SEW of 0 is an operation on mask registers only.
- return Log2SEW == 0;
- }
- /// Which subfields of VL or VTYPE have values we need to preserve?
- struct DemandedFields {
- // Some unknown property of VL is used. If demanded, must preserve entire
- // value.
- bool VLAny = false;
- // Only zero vs non-zero is used. If demanded, can change non-zero values.
- bool VLZeroness = false;
- bool SEW = false;
- bool LMUL = false;
- bool SEWLMULRatio = false;
- bool TailPolicy = false;
- bool MaskPolicy = false;
- // Return true if any part of VTYPE was used
- bool usedVTYPE() const {
- return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy;
- }
- // Return true if any property of VL was used
- bool usedVL() {
- return VLAny || VLZeroness;
- }
- // Mark all VTYPE subfields and properties as demanded
- void demandVTYPE() {
- SEW = true;
- LMUL = true;
- SEWLMULRatio = true;
- TailPolicy = true;
- MaskPolicy = true;
- }
- // Mark all VL properties as demanded
- void demandVL() {
- VLAny = true;
- VLZeroness = true;
- }
- #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- /// Support for debugging, callable in GDB: V->dump()
- LLVM_DUMP_METHOD void dump() const {
- print(dbgs());
- dbgs() << "\n";
- }
- /// Implement operator<<.
- void print(raw_ostream &OS) const {
- OS << "{";
- OS << "VLAny=" << VLAny << ", ";
- OS << "VLZeroness=" << VLZeroness << ", ";
- OS << "SEW=" << SEW << ", ";
- OS << "LMUL=" << LMUL << ", ";
- OS << "SEWLMULRatio=" << SEWLMULRatio << ", ";
- OS << "TailPolicy=" << TailPolicy << ", ";
- OS << "MaskPolicy=" << MaskPolicy;
- OS << "}";
- }
- #endif
- };
- #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- LLVM_ATTRIBUTE_USED
- inline raw_ostream &operator<<(raw_ostream &OS, const DemandedFields &DF) {
- DF.print(OS);
- return OS;
- }
- #endif
- /// Return true if the two values of the VTYPE register provided are
- /// indistinguishable from the perspective of an instruction (or set of
- /// instructions) which use only the Used subfields and properties.
- static bool areCompatibleVTYPEs(uint64_t VType1,
- uint64_t VType2,
- const DemandedFields &Used) {
- if (Used.SEW &&
- RISCVVType::getSEW(VType1) != RISCVVType::getSEW(VType2))
- return false;
- if (Used.LMUL &&
- RISCVVType::getVLMUL(VType1) != RISCVVType::getVLMUL(VType2))
- return false;
- if (Used.SEWLMULRatio) {
- auto Ratio1 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(VType1),
- RISCVVType::getVLMUL(VType1));
- auto Ratio2 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(VType2),
- RISCVVType::getVLMUL(VType2));
- if (Ratio1 != Ratio2)
- return false;
- }
- if (Used.TailPolicy &&
- RISCVVType::isTailAgnostic(VType1) != RISCVVType::isTailAgnostic(VType2))
- return false;
- if (Used.MaskPolicy &&
- RISCVVType::isMaskAgnostic(VType1) != RISCVVType::isMaskAgnostic(VType2))
- return false;
- return true;
- }
- /// Return the fields and properties demanded by the provided instruction.
- static DemandedFields getDemanded(const MachineInstr &MI) {
- // Warning: This function has to work on both the lowered (i.e. post
- // emitVSETVLIs) and pre-lowering forms. The main implication of this is
- // that it can't use the value of a SEW, VL, or Policy operand as they might
- // be stale after lowering.
- // Most instructions don't use any of these subfeilds.
- DemandedFields Res;
- // Start conservative if registers are used
- if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VL))
- Res.demandVL();;
- if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VTYPE))
- Res.demandVTYPE();
- // Start conservative on the unlowered form too
- uint64_t TSFlags = MI.getDesc().TSFlags;
- if (RISCVII::hasSEWOp(TSFlags)) {
- Res.demandVTYPE();
- if (RISCVII::hasVLOp(TSFlags))
- Res.demandVL();
- // Behavior is independent of mask policy.
- if (!RISCVII::usesMaskPolicy(TSFlags))
- Res.MaskPolicy = false;
- }
- // Loads and stores with implicit EEW do not demand SEW or LMUL directly.
- // They instead demand the ratio of the two which is used in computing
- // EMUL, but which allows us the flexibility to change SEW and LMUL
- // provided we don't change the ratio.
- // Note: We assume that the instructions initial SEW is the EEW encoded
- // in the opcode. This is asserted when constructing the VSETVLIInfo.
- if (getEEWForLoadStore(MI)) {
- Res.SEW = false;
- Res.LMUL = false;
- }
- // Store instructions don't use the policy fields.
- if (RISCVII::hasSEWOp(TSFlags) && MI.getNumExplicitDefs() == 0) {
- Res.TailPolicy = false;
- Res.MaskPolicy = false;
- }
- // If this is a mask reg operation, it only cares about VLMAX.
- // TODO: Possible extensions to this logic
- // * Probably ok if available VLMax is larger than demanded
- // * The policy bits can probably be ignored..
- if (isMaskRegOp(MI)) {
- Res.SEW = false;
- Res.LMUL = false;
- }
- // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and VL > 0.
- if (isScalarMoveInstr(MI)) {
- Res.LMUL = false;
- Res.SEWLMULRatio = false;
- Res.VLAny = false;
- }
- return Res;
- }
- /// Defines the abstract state with which the forward dataflow models the
- /// values of the VL and VTYPE registers after insertion.
- class VSETVLIInfo {
- union {
- Register AVLReg;
- unsigned AVLImm;
- };
- enum : uint8_t {
- Uninitialized,
- AVLIsReg,
- AVLIsImm,
- Unknown,
- } State = Uninitialized;
- // Fields from VTYPE.
- RISCVII::VLMUL VLMul = RISCVII::LMUL_1;
- uint8_t SEW = 0;
- uint8_t TailAgnostic : 1;
- uint8_t MaskAgnostic : 1;
- uint8_t SEWLMULRatioOnly : 1;
- public:
- VSETVLIInfo()
- : AVLImm(0), TailAgnostic(false), MaskAgnostic(false),
- SEWLMULRatioOnly(false) {}
- static VSETVLIInfo getUnknown() {
- VSETVLIInfo Info;
- Info.setUnknown();
- return Info;
- }
- bool isValid() const { return State != Uninitialized; }
- void setUnknown() { State = Unknown; }
- bool isUnknown() const { return State == Unknown; }
- void setAVLReg(Register Reg) {
- AVLReg = Reg;
- State = AVLIsReg;
- }
- void setAVLImm(unsigned Imm) {
- AVLImm = Imm;
- State = AVLIsImm;
- }
- bool hasAVLImm() const { return State == AVLIsImm; }
- bool hasAVLReg() const { return State == AVLIsReg; }
- Register getAVLReg() const {
- assert(hasAVLReg());
- return AVLReg;
- }
- unsigned getAVLImm() const {
- assert(hasAVLImm());
- return AVLImm;
- }
- unsigned getSEW() const { return SEW; }
- RISCVII::VLMUL getVLMUL() const { return VLMul; }
- bool hasNonZeroAVL() const {
- if (hasAVLImm())
- return getAVLImm() > 0;
- if (hasAVLReg())
- return getAVLReg() == RISCV::X0;
- return false;
- }
- bool hasEquallyZeroAVL(const VSETVLIInfo &Other) const {
- if (hasSameAVL(Other))
- return true;
- return (hasNonZeroAVL() && Other.hasNonZeroAVL());
- }
- bool hasSameAVL(const VSETVLIInfo &Other) const {
- if (hasAVLReg() && Other.hasAVLReg())
- return getAVLReg() == Other.getAVLReg();
- if (hasAVLImm() && Other.hasAVLImm())
- return getAVLImm() == Other.getAVLImm();
- return false;
- }
- void setVTYPE(unsigned VType) {
- assert(isValid() && !isUnknown() &&
- "Can't set VTYPE for uninitialized or unknown");
- VLMul = RISCVVType::getVLMUL(VType);
- SEW = RISCVVType::getSEW(VType);
- TailAgnostic = RISCVVType::isTailAgnostic(VType);
- MaskAgnostic = RISCVVType::isMaskAgnostic(VType);
- }
- void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA) {
- assert(isValid() && !isUnknown() &&
- "Can't set VTYPE for uninitialized or unknown");
- VLMul = L;
- SEW = S;
- TailAgnostic = TA;
- MaskAgnostic = MA;
- }
- unsigned encodeVTYPE() const {
- assert(isValid() && !isUnknown() && !SEWLMULRatioOnly &&
- "Can't encode VTYPE for uninitialized or unknown");
- return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
- }
- bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; }
- bool hasSameVTYPE(const VSETVLIInfo &Other) const {
- assert(isValid() && Other.isValid() &&
- "Can't compare invalid VSETVLIInfos");
- assert(!isUnknown() && !Other.isUnknown() &&
- "Can't compare VTYPE in unknown state");
- assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly &&
- "Can't compare when only LMUL/SEW ratio is valid.");
- return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) ==
- std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic,
- Other.MaskAgnostic);
- }
- unsigned getSEWLMULRatio() const {
- assert(isValid() && !isUnknown() &&
- "Can't use VTYPE for uninitialized or unknown");
- return RISCVVType::getSEWLMULRatio(SEW, VLMul);
- }
- // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX.
- // Note that having the same VLMAX ensures that both share the same
- // function from AVL to VL; that is, they must produce the same VL value
- // for any given AVL value.
- bool hasSameVLMAX(const VSETVLIInfo &Other) const {
- assert(isValid() && Other.isValid() &&
- "Can't compare invalid VSETVLIInfos");
- assert(!isUnknown() && !Other.isUnknown() &&
- "Can't compare VTYPE in unknown state");
- return getSEWLMULRatio() == Other.getSEWLMULRatio();
- }
- bool hasCompatibleVTYPE(const DemandedFields &Used,
- const VSETVLIInfo &Require) const {
- return areCompatibleVTYPEs(encodeVTYPE(), Require.encodeVTYPE(), Used);
- }
- // Determine whether the vector instructions requirements represented by
- // Require are compatible with the previous vsetvli instruction represented
- // by this. MI is the instruction whose requirements we're considering.
- bool isCompatible(const DemandedFields &Used, const VSETVLIInfo &Require) const {
- assert(isValid() && Require.isValid() &&
- "Can't compare invalid VSETVLIInfos");
- assert(!Require.SEWLMULRatioOnly &&
- "Expected a valid VTYPE for instruction!");
- // Nothing is compatible with Unknown.
- if (isUnknown() || Require.isUnknown())
- return false;
- // If only our VLMAX ratio is valid, then this isn't compatible.
- if (SEWLMULRatioOnly)
- return false;
- // If the instruction doesn't need an AVLReg and the SEW matches, consider
- // it compatible.
- if (Require.hasAVLReg() && Require.AVLReg == RISCV::NoRegister)
- if (SEW == Require.SEW)
- return true;
- if (Used.VLAny && !hasSameAVL(Require))
- return false;
- if (Used.VLZeroness && !hasEquallyZeroAVL(Require))
- return false;
- return areCompatibleVTYPEs(encodeVTYPE(), Require.encodeVTYPE(), Used);
- }
- bool operator==(const VSETVLIInfo &Other) const {
- // Uninitialized is only equal to another Uninitialized.
- if (!isValid())
- return !Other.isValid();
- if (!Other.isValid())
- return !isValid();
- // Unknown is only equal to another Unknown.
- if (isUnknown())
- return Other.isUnknown();
- if (Other.isUnknown())
- return isUnknown();
- if (!hasSameAVL(Other))
- return false;
- // If the SEWLMULRatioOnly bits are different, then they aren't equal.
- if (SEWLMULRatioOnly != Other.SEWLMULRatioOnly)
- return false;
- // If only the VLMAX is valid, check that it is the same.
- if (SEWLMULRatioOnly)
- return hasSameVLMAX(Other);
- // If the full VTYPE is valid, check that it is the same.
- return hasSameVTYPE(Other);
- }
- bool operator!=(const VSETVLIInfo &Other) const {
- return !(*this == Other);
- }
- // Calculate the VSETVLIInfo visible to a block assuming this and Other are
- // both predecessors.
- VSETVLIInfo intersect(const VSETVLIInfo &Other) const {
- // If the new value isn't valid, ignore it.
- if (!Other.isValid())
- return *this;
- // If this value isn't valid, this must be the first predecessor, use it.
- if (!isValid())
- return Other;
- // If either is unknown, the result is unknown.
- if (isUnknown() || Other.isUnknown())
- return VSETVLIInfo::getUnknown();
- // If we have an exact, match return this.
- if (*this == Other)
- return *this;
- // Not an exact match, but maybe the AVL and VLMAX are the same. If so,
- // return an SEW/LMUL ratio only value.
- if (hasSameAVL(Other) && hasSameVLMAX(Other)) {
- VSETVLIInfo MergeInfo = *this;
- MergeInfo.SEWLMULRatioOnly = true;
- return MergeInfo;
- }
- // Otherwise the result is unknown.
- return VSETVLIInfo::getUnknown();
- }
- #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- /// Support for debugging, callable in GDB: V->dump()
- LLVM_DUMP_METHOD void dump() const {
- print(dbgs());
- dbgs() << "\n";
- }
- /// Implement operator<<.
- /// @{
- void print(raw_ostream &OS) const {
- OS << "{";
- if (!isValid())
- OS << "Uninitialized";
- if (isUnknown())
- OS << "unknown";
- if (hasAVLReg())
- OS << "AVLReg=" << (unsigned)AVLReg;
- if (hasAVLImm())
- OS << "AVLImm=" << (unsigned)AVLImm;
- OS << ", "
- << "VLMul=" << (unsigned)VLMul << ", "
- << "SEW=" << (unsigned)SEW << ", "
- << "TailAgnostic=" << (bool)TailAgnostic << ", "
- << "MaskAgnostic=" << (bool)MaskAgnostic << ", "
- << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}";
- }
- #endif
- };
- #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- LLVM_ATTRIBUTE_USED
- inline raw_ostream &operator<<(raw_ostream &OS, const VSETVLIInfo &V) {
- V.print(OS);
- return OS;
- }
- #endif
- struct BlockData {
- // The VSETVLIInfo that represents the net changes to the VL/VTYPE registers
- // made by this block. Calculated in Phase 1.
- VSETVLIInfo Change;
- // The VSETVLIInfo that represents the VL/VTYPE settings on exit from this
- // block. Calculated in Phase 2.
- VSETVLIInfo Exit;
- // The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor
- // blocks. Calculated in Phase 2, and used by Phase 3.
- VSETVLIInfo Pred;
- // Keeps track of whether the block is already in the queue.
- bool InQueue = false;
- BlockData() = default;
- };
- class RISCVInsertVSETVLI : public MachineFunctionPass {
- const TargetInstrInfo *TII;
- MachineRegisterInfo *MRI;
- std::vector<BlockData> BlockInfo;
- std::queue<const MachineBasicBlock *> WorkList;
- public:
- static char ID;
- RISCVInsertVSETVLI() : MachineFunctionPass(ID) {
- initializeRISCVInsertVSETVLIPass(*PassRegistry::getPassRegistry());
- }
- bool runOnMachineFunction(MachineFunction &MF) override;
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
- StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; }
- private:
- bool needVSETVLI(const MachineInstr &MI, const VSETVLIInfo &Require,
- const VSETVLIInfo &CurInfo) const;
- bool needVSETVLIPHI(const VSETVLIInfo &Require,
- const MachineBasicBlock &MBB) const;
- void insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
- const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
- void insertVSETVLI(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator InsertPt, DebugLoc DL,
- const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
- void transferBefore(VSETVLIInfo &Info, const MachineInstr &MI);
- void transferAfter(VSETVLIInfo &Info, const MachineInstr &MI);
- bool computeVLVTYPEChanges(const MachineBasicBlock &MBB);
- void computeIncomingVLVTYPE(const MachineBasicBlock &MBB);
- void emitVSETVLIs(MachineBasicBlock &MBB);
- void doLocalPostpass(MachineBasicBlock &MBB);
- void doPRE(MachineBasicBlock &MBB);
- void insertReadVL(MachineBasicBlock &MBB);
- };
- } // end anonymous namespace
- char RISCVInsertVSETVLI::ID = 0;
- INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME,
- false, false)
- static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
- const MachineRegisterInfo *MRI) {
- VSETVLIInfo InstrInfo;
- bool TailAgnostic, MaskAgnostic;
- unsigned UseOpIdx;
- if (MI.isRegTiedToUseOperand(0, &UseOpIdx)) {
- // Start with undisturbed.
- TailAgnostic = false;
- MaskAgnostic = false;
- // If there is a policy operand, use it.
- if (RISCVII::hasVecPolicyOp(TSFlags)) {
- const MachineOperand &Op = MI.getOperand(MI.getNumExplicitOperands() - 1);
- uint64_t Policy = Op.getImm();
- assert(Policy <= (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC) &&
- "Invalid Policy Value");
- TailAgnostic = Policy & RISCVII::TAIL_AGNOSTIC;
- MaskAgnostic = Policy & RISCVII::MASK_AGNOSTIC;
- }
- // If the tied operand is an IMPLICIT_DEF we can use TailAgnostic and
- // MaskAgnostic.
- const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
- MachineInstr *UseMI = MRI->getVRegDef(UseMO.getReg());
- if (UseMI && UseMI->isImplicitDef()) {
- TailAgnostic = true;
- MaskAgnostic = true;
- }
- // Some pseudo instructions force a tail agnostic policy despite having a
- // tied def.
- if (RISCVII::doesForceTailAgnostic(TSFlags))
- TailAgnostic = true;
- if (!RISCVII::usesMaskPolicy(TSFlags))
- MaskAgnostic = true;
- } else {
- // If there is no tied operand,, there shouldn't be a policy operand.
- assert(!RISCVII::hasVecPolicyOp(TSFlags) && "Unexpected policy operand");
- // No tied operand use agnostic policies.
- TailAgnostic = true;
- MaskAgnostic = true;
- }
- RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags);
- unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
- // A Log2SEW of 0 is an operation on mask registers only.
- unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
- assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
- if (RISCVII::hasVLOp(TSFlags)) {
- const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
- if (VLOp.isImm()) {
- int64_t Imm = VLOp.getImm();
- // Conver the VLMax sentintel to X0 register.
- if (Imm == RISCV::VLMaxSentinel)
- InstrInfo.setAVLReg(RISCV::X0);
- else
- InstrInfo.setAVLImm(Imm);
- } else {
- InstrInfo.setAVLReg(VLOp.getReg());
- }
- } else {
- InstrInfo.setAVLReg(RISCV::NoRegister);
- }
- #ifndef NDEBUG
- if (std::optional<unsigned> EEW = getEEWForLoadStore(MI)) {
- assert(SEW == EEW && "Initial SEW doesn't match expected EEW");
- }
- #endif
- InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
- return InstrInfo;
- }
- void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
- const VSETVLIInfo &Info,
- const VSETVLIInfo &PrevInfo) {
- DebugLoc DL = MI.getDebugLoc();
- insertVSETVLI(MBB, MachineBasicBlock::iterator(&MI), DL, Info, PrevInfo);
- }
- void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator InsertPt, DebugLoc DL,
- const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) {
- // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same
- // VLMAX.
- if (PrevInfo.isValid() && !PrevInfo.isUnknown() &&
- Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) {
- BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
- .addReg(RISCV::X0, RegState::Define | RegState::Dead)
- .addReg(RISCV::X0, RegState::Kill)
- .addImm(Info.encodeVTYPE())
- .addReg(RISCV::VL, RegState::Implicit);
- return;
- }
- if (Info.hasAVLImm()) {
- BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
- .addReg(RISCV::X0, RegState::Define | RegState::Dead)
- .addImm(Info.getAVLImm())
- .addImm(Info.encodeVTYPE());
- return;
- }
- Register AVLReg = Info.getAVLReg();
- if (AVLReg == RISCV::NoRegister) {
- // We can only use x0, x0 if there's no chance of the vtype change causing
- // the previous vl to become invalid.
- if (PrevInfo.isValid() && !PrevInfo.isUnknown() &&
- Info.hasSameVLMAX(PrevInfo)) {
- BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
- .addReg(RISCV::X0, RegState::Define | RegState::Dead)
- .addReg(RISCV::X0, RegState::Kill)
- .addImm(Info.encodeVTYPE())
- .addReg(RISCV::VL, RegState::Implicit);
- return;
- }
- // Otherwise use an AVL of 0 to avoid depending on previous vl.
- BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
- .addReg(RISCV::X0, RegState::Define | RegState::Dead)
- .addImm(0)
- .addImm(Info.encodeVTYPE());
- return;
- }
- if (AVLReg.isVirtual())
- MRI->constrainRegClass(AVLReg, &RISCV::GPRNoX0RegClass);
- // Use X0 as the DestReg unless AVLReg is X0. We also need to change the
- // opcode if the AVLReg is X0 as they have different register classes for
- // the AVL operand.
- Register DestReg = RISCV::X0;
- unsigned Opcode = RISCV::PseudoVSETVLI;
- if (AVLReg == RISCV::X0) {
- DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass);
- Opcode = RISCV::PseudoVSETVLIX0;
- }
- BuildMI(MBB, InsertPt, DL, TII->get(Opcode))
- .addReg(DestReg, RegState::Define | RegState::Dead)
- .addReg(AVLReg)
- .addImm(Info.encodeVTYPE());
- }
- // Return a VSETVLIInfo representing the changes made by this VSETVLI or
- // VSETIVLI instruction.
- static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) {
- VSETVLIInfo NewInfo;
- if (MI.getOpcode() == RISCV::PseudoVSETIVLI) {
- NewInfo.setAVLImm(MI.getOperand(1).getImm());
- } else {
- assert(MI.getOpcode() == RISCV::PseudoVSETVLI ||
- MI.getOpcode() == RISCV::PseudoVSETVLIX0);
- Register AVLReg = MI.getOperand(1).getReg();
- assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) &&
- "Can't handle X0, X0 vsetvli yet");
- NewInfo.setAVLReg(AVLReg);
- }
- NewInfo.setVTYPE(MI.getOperand(2).getImm());
- return NewInfo;
- }
- /// Return true if a VSETVLI is required to transition from CurInfo to Require
- /// before MI.
- bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
- const VSETVLIInfo &Require,
- const VSETVLIInfo &CurInfo) const {
- assert(Require == computeInfoForInstr(MI, MI.getDesc().TSFlags, MRI));
- if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly())
- return true;
- DemandedFields Used = getDemanded(MI);
- if (isScalarMoveInstr(MI)) {
- // For vmv.s.x and vfmv.s.f, if writing to an implicit_def operand, we don't
- // need to preserve any other bits and are thus compatible with any larger,
- // etype and can disregard policy bits. Warning: It's tempting to try doing
- // this for any tail agnostic operation, but we can't as TA requires
- // tail lanes to either be the original value or -1. We are writing
- // unknown bits to the lanes here.
- auto *VRegDef = MRI->getVRegDef(MI.getOperand(1).getReg());
- if (VRegDef && VRegDef->isImplicitDef() &&
- CurInfo.getSEW() >= Require.getSEW()) {
- Used.SEW = false;
- Used.TailPolicy = false;
- }
- }
- if (CurInfo.isCompatible(Used, Require))
- return false;
- // We didn't find a compatible value. If our AVL is a virtual register,
- // it might be defined by a VSET(I)VLI. If it has the same VLMAX we need
- // and the last VL/VTYPE we observed is the same, we don't need a
- // VSETVLI here.
- if (Require.hasAVLReg() && Require.getAVLReg().isVirtual() &&
- CurInfo.hasCompatibleVTYPE(Used, Require)) {
- if (MachineInstr *DefMI = MRI->getVRegDef(Require.getAVLReg())) {
- if (isVectorConfigInstr(*DefMI)) {
- VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
- if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVLMAX(CurInfo))
- return false;
- }
- }
- }
- return true;
- }
- // Given an incoming state reaching MI, modifies that state so that it is minimally
- // compatible with MI. The resulting state is guaranteed to be semantically legal
- // for MI, but may not be the state requested by MI.
- void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) {
- uint64_t TSFlags = MI.getDesc().TSFlags;
- if (!RISCVII::hasSEWOp(TSFlags))
- return;
- const VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI);
- if (Info.isValid() && !needVSETVLI(MI, NewInfo, Info))
- return;
- const VSETVLIInfo PrevInfo = Info;
- Info = NewInfo;
- if (!RISCVII::hasVLOp(TSFlags))
- return;
- // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and
- // VL > 0. We can discard the user requested AVL and just use the last
- // one if we can prove it equally zero. This removes a vsetvli entirely
- // if the types match or allows use of cheaper avl preserving variant
- // if VLMAX doesn't change. If VLMAX might change, we couldn't use
- // the 'vsetvli x0, x0, vtype" variant, so we avoid the transform to
- // prevent extending live range of an avl register operand.
- // TODO: We can probably relax this for immediates.
- if (isScalarMoveInstr(MI) && PrevInfo.isValid() &&
- PrevInfo.hasEquallyZeroAVL(Info) &&
- Info.hasSameVLMAX(PrevInfo)) {
- if (PrevInfo.hasAVLImm())
- Info.setAVLImm(PrevInfo.getAVLImm());
- else
- Info.setAVLReg(PrevInfo.getAVLReg());
- return;
- }
- // If AVL is defined by a vsetvli with the same VLMAX, we can
- // replace the AVL operand with the AVL of the defining vsetvli.
- // We avoid general register AVLs to avoid extending live ranges
- // without being sure we can kill the original source reg entirely.
- if (!Info.hasAVLReg() || !Info.getAVLReg().isVirtual())
- return;
- MachineInstr *DefMI = MRI->getVRegDef(Info.getAVLReg());
- if (!DefMI || !isVectorConfigInstr(*DefMI))
- return;
- VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
- if (DefInfo.hasSameVLMAX(Info) &&
- (DefInfo.hasAVLImm() || DefInfo.getAVLReg() == RISCV::X0)) {
- if (DefInfo.hasAVLImm())
- Info.setAVLImm(DefInfo.getAVLImm());
- else
- Info.setAVLReg(DefInfo.getAVLReg());
- return;
- }
- }
- // Given a state with which we evaluated MI (see transferBefore above for why
- // this might be different that the state MI requested), modify the state to
- // reflect the changes MI might make.
- void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) {
- if (isVectorConfigInstr(MI)) {
- Info = getInfoForVSETVLI(MI);
- return;
- }
- if (RISCV::isFaultFirstLoad(MI)) {
- // Update AVL to vl-output of the fault first load.
- Info.setAVLReg(MI.getOperand(1).getReg());
- return;
- }
- // If this is something that updates VL/VTYPE that we don't know about, set
- // the state to unknown.
- if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) ||
- MI.modifiesRegister(RISCV::VTYPE))
- Info = VSETVLIInfo::getUnknown();
- }
- bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB) {
- bool HadVectorOp = false;
- BlockData &BBInfo = BlockInfo[MBB.getNumber()];
- BBInfo.Change = BBInfo.Pred;
- for (const MachineInstr &MI : MBB) {
- transferBefore(BBInfo.Change, MI);
- if (isVectorConfigInstr(MI) || RISCVII::hasSEWOp(MI.getDesc().TSFlags))
- HadVectorOp = true;
- transferAfter(BBInfo.Change, MI);
- }
- return HadVectorOp;
- }
- void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) {
- BlockData &BBInfo = BlockInfo[MBB.getNumber()];
- BBInfo.InQueue = false;
- // Start with the previous entry so that we keep the most conservative state
- // we have ever found.
- VSETVLIInfo InInfo = BBInfo.Pred;
- if (MBB.pred_empty()) {
- // There are no predecessors, so use the default starting status.
- InInfo.setUnknown();
- } else {
- for (MachineBasicBlock *P : MBB.predecessors())
- InInfo = InInfo.intersect(BlockInfo[P->getNumber()].Exit);
- }
- // If we don't have any valid predecessor value, wait until we do.
- if (!InInfo.isValid())
- return;
- // If no change, no need to rerun block
- if (InInfo == BBInfo.Pred)
- return;
- BBInfo.Pred = InInfo;
- LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB)
- << " changed to " << BBInfo.Pred << "\n");
- // Note: It's tempting to cache the state changes here, but due to the
- // compatibility checks performed a blocks output state can change based on
- // the input state. To cache, we'd have to add logic for finding
- // never-compatible state changes.
- computeVLVTYPEChanges(MBB);
- VSETVLIInfo TmpStatus = BBInfo.Change;
- // If the new exit value matches the old exit value, we don't need to revisit
- // any blocks.
- if (BBInfo.Exit == TmpStatus)
- return;
- BBInfo.Exit = TmpStatus;
- LLVM_DEBUG(dbgs() << "Exit state of " << printMBBReference(MBB)
- << " changed to " << BBInfo.Exit << "\n");
- // Add the successors to the work list so we can propagate the changed exit
- // status.
- for (MachineBasicBlock *S : MBB.successors())
- if (!BlockInfo[S->getNumber()].InQueue) {
- BlockInfo[S->getNumber()].InQueue = true;
- WorkList.push(S);
- }
- }
- // If we weren't able to prove a vsetvli was directly unneeded, it might still
- // be unneeded if the AVL is a phi node where all incoming values are VL
- // outputs from the last VSETVLI in their respective basic blocks.
- bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
- const MachineBasicBlock &MBB) const {
- if (DisableInsertVSETVLPHIOpt)
- return true;
- if (!Require.hasAVLReg())
- return true;
- Register AVLReg = Require.getAVLReg();
- if (!AVLReg.isVirtual())
- return true;
- // We need the AVL to be produce by a PHI node in this basic block.
- MachineInstr *PHI = MRI->getVRegDef(AVLReg);
- if (!PHI || PHI->getOpcode() != RISCV::PHI || PHI->getParent() != &MBB)
- return true;
- for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps;
- PHIOp += 2) {
- Register InReg = PHI->getOperand(PHIOp).getReg();
- MachineBasicBlock *PBB = PHI->getOperand(PHIOp + 1).getMBB();
- const BlockData &PBBInfo = BlockInfo[PBB->getNumber()];
- // If the exit from the predecessor has the VTYPE we are looking for
- // we might be able to avoid a VSETVLI.
- if (PBBInfo.Exit.isUnknown() || !PBBInfo.Exit.hasSameVTYPE(Require))
- return true;
- // We need the PHI input to the be the output of a VSET(I)VLI.
- MachineInstr *DefMI = MRI->getVRegDef(InReg);
- if (!DefMI || !isVectorConfigInstr(*DefMI))
- return true;
- // We found a VSET(I)VLI make sure it matches the output of the
- // predecessor block.
- VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
- if (!DefInfo.hasSameAVL(PBBInfo.Exit) ||
- !DefInfo.hasSameVTYPE(PBBInfo.Exit))
- return true;
- }
- // If all the incoming values to the PHI checked out, we don't need
- // to insert a VSETVLI.
- return false;
- }
- void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
- VSETVLIInfo CurInfo = BlockInfo[MBB.getNumber()].Pred;
- // Track whether the prefix of the block we've scanned is transparent
- // (meaning has not yet changed the abstract state).
- bool PrefixTransparent = true;
- for (MachineInstr &MI : MBB) {
- const VSETVLIInfo PrevInfo = CurInfo;
- transferBefore(CurInfo, MI);
- // If this is an explicit VSETVLI or VSETIVLI, update our state.
- if (isVectorConfigInstr(MI)) {
- // Conservatively, mark the VL and VTYPE as live.
- assert(MI.getOperand(3).getReg() == RISCV::VL &&
- MI.getOperand(4).getReg() == RISCV::VTYPE &&
- "Unexpected operands where VL and VTYPE should be");
- MI.getOperand(3).setIsDead(false);
- MI.getOperand(4).setIsDead(false);
- PrefixTransparent = false;
- }
- uint64_t TSFlags = MI.getDesc().TSFlags;
- if (RISCVII::hasSEWOp(TSFlags)) {
- if (PrevInfo != CurInfo) {
- // If this is the first implicit state change, and the state change
- // requested can be proven to produce the same register contents, we
- // can skip emitting the actual state change and continue as if we
- // had since we know the GPR result of the implicit state change
- // wouldn't be used and VL/VTYPE registers are correct. Note that
- // we *do* need to model the state as if it changed as while the
- // register contents are unchanged, the abstract model can change.
- if (!PrefixTransparent || needVSETVLIPHI(CurInfo, MBB))
- insertVSETVLI(MBB, MI, CurInfo, PrevInfo);
- PrefixTransparent = false;
- }
- if (RISCVII::hasVLOp(TSFlags)) {
- MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
- if (VLOp.isReg()) {
- // Erase the AVL operand from the instruction.
- VLOp.setReg(RISCV::NoRegister);
- VLOp.setIsKill(false);
- }
- MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false,
- /*isImp*/ true));
- }
- MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false,
- /*isImp*/ true));
- }
- if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) ||
- MI.modifiesRegister(RISCV::VTYPE))
- PrefixTransparent = false;
- transferAfter(CurInfo, MI);
- }
- // If we reach the end of the block and our current info doesn't match the
- // expected info, insert a vsetvli to correct.
- if (!UseStrictAsserts) {
- const VSETVLIInfo &ExitInfo = BlockInfo[MBB.getNumber()].Exit;
- if (CurInfo.isValid() && ExitInfo.isValid() && !ExitInfo.isUnknown() &&
- CurInfo != ExitInfo) {
- // Note there's an implicit assumption here that terminators never use
- // or modify VL or VTYPE. Also, fallthrough will return end().
- auto InsertPt = MBB.getFirstInstrTerminator();
- insertVSETVLI(MBB, InsertPt, MBB.findDebugLoc(InsertPt), ExitInfo,
- CurInfo);
- CurInfo = ExitInfo;
- }
- }
- if (UseStrictAsserts && CurInfo.isValid()) {
- const auto &Info = BlockInfo[MBB.getNumber()];
- if (CurInfo != Info.Exit) {
- LLVM_DEBUG(dbgs() << "in block " << printMBBReference(MBB) << "\n");
- LLVM_DEBUG(dbgs() << " begin state: " << Info.Pred << "\n");
- LLVM_DEBUG(dbgs() << " expected end state: " << Info.Exit << "\n");
- LLVM_DEBUG(dbgs() << " actual end state: " << CurInfo << "\n");
- }
- assert(CurInfo == Info.Exit &&
- "InsertVSETVLI dataflow invariant violated");
- }
- }
- /// Return true if the VL value configured must be equal to the requested one.
- static bool hasFixedResult(const VSETVLIInfo &Info, const RISCVSubtarget &ST) {
- if (!Info.hasAVLImm())
- // VLMAX is always the same value.
- // TODO: Could extend to other registers by looking at the associated vreg
- // def placement.
- return RISCV::X0 == Info.getAVLReg();
- unsigned AVL = Info.getAVLImm();
- unsigned SEW = Info.getSEW();
- unsigned AVLInBits = AVL * SEW;
- unsigned LMul;
- bool Fractional;
- std::tie(LMul, Fractional) = RISCVVType::decodeVLMUL(Info.getVLMUL());
- if (Fractional)
- return ST.getRealMinVLen() / LMul >= AVLInBits;
- return ST.getRealMinVLen() * LMul >= AVLInBits;
- }
- /// Perform simple partial redundancy elimination of the VSETVLI instructions
- /// we're about to insert by looking for cases where we can PRE from the
- /// beginning of one block to the end of one of its predecessors. Specifically,
- /// this is geared to catch the common case of a fixed length vsetvl in a single
- /// block loop when it could execute once in the preheader instead.
- void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) {
- const MachineFunction &MF = *MBB.getParent();
- const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
- if (!BlockInfo[MBB.getNumber()].Pred.isUnknown())
- return;
- MachineBasicBlock *UnavailablePred = nullptr;
- VSETVLIInfo AvailableInfo;
- for (MachineBasicBlock *P : MBB.predecessors()) {
- const VSETVLIInfo &PredInfo = BlockInfo[P->getNumber()].Exit;
- if (PredInfo.isUnknown()) {
- if (UnavailablePred)
- return;
- UnavailablePred = P;
- } else if (!AvailableInfo.isValid()) {
- AvailableInfo = PredInfo;
- } else if (AvailableInfo != PredInfo) {
- return;
- }
- }
- // Unreachable, single pred, or full redundancy. Note that FRE is handled by
- // phase 3.
- if (!UnavailablePred || !AvailableInfo.isValid())
- return;
- // Critical edge - TODO: consider splitting?
- if (UnavailablePred->succ_size() != 1)
- return;
- // If VL can be less than AVL, then we can't reduce the frequency of exec.
- if (!hasFixedResult(AvailableInfo, ST))
- return;
- // Does it actually let us remove an implicit transition in MBB?
- bool Found = false;
- for (auto &MI : MBB) {
- if (isVectorConfigInstr(MI))
- return;
- const uint64_t TSFlags = MI.getDesc().TSFlags;
- if (RISCVII::hasSEWOp(TSFlags)) {
- if (AvailableInfo != computeInfoForInstr(MI, TSFlags, MRI))
- return;
- Found = true;
- break;
- }
- }
- if (!Found)
- return;
- // Finally, update both data flow state and insert the actual vsetvli.
- // Doing both keeps the code in sync with the dataflow results, which
- // is critical for correctness of phase 3.
- auto OldInfo = BlockInfo[UnavailablePred->getNumber()].Exit;
- LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to "
- << UnavailablePred->getName() << " with state "
- << AvailableInfo << "\n");
- BlockInfo[UnavailablePred->getNumber()].Exit = AvailableInfo;
- BlockInfo[MBB.getNumber()].Pred = AvailableInfo;
- // Note there's an implicit assumption here that terminators never use
- // or modify VL or VTYPE. Also, fallthrough will return end().
- auto InsertPt = UnavailablePred->getFirstInstrTerminator();
- insertVSETVLI(*UnavailablePred, InsertPt,
- UnavailablePred->findDebugLoc(InsertPt),
- AvailableInfo, OldInfo);
- }
- static void doUnion(DemandedFields &A, DemandedFields B) {
- A.VLAny |= B.VLAny;
- A.VLZeroness |= B.VLZeroness;
- A.SEW |= B.SEW;
- A.LMUL |= B.LMUL;
- A.SEWLMULRatio |= B.SEWLMULRatio;
- A.TailPolicy |= B.TailPolicy;
- A.MaskPolicy |= B.MaskPolicy;
- }
- static bool isNonZeroAVL(const MachineOperand &MO) {
- if (MO.isReg())
- return RISCV::X0 == MO.getReg();
- assert(MO.isImm());
- return 0 != MO.getImm();
- }
- // Return true if we can mutate PrevMI to match MI without changing any the
- // fields which would be observed.
- static bool canMutatePriorConfig(const MachineInstr &PrevMI,
- const MachineInstr &MI,
- const DemandedFields &Used) {
- // If the VL values aren't equal, return false if either a) the former is
- // demanded, or b) we can't rewrite the former to be the later for
- // implementation reasons.
- if (!isVLPreservingConfig(MI)) {
- if (Used.VLAny)
- return false;
- // TODO: Requires more care in the mutation...
- if (isVLPreservingConfig(PrevMI))
- return false;
- // We don't bother to handle the equally zero case here as it's largely
- // uninteresting.
- if (Used.VLZeroness &&
- (!isNonZeroAVL(MI.getOperand(1)) ||
- !isNonZeroAVL(PrevMI.getOperand(1))))
- return false;
- // TODO: Track whether the register is defined between
- // PrevMI and MI.
- if (MI.getOperand(1).isReg() &&
- RISCV::X0 != MI.getOperand(1).getReg())
- return false;
- // TODO: We need to change the result register to allow this rewrite
- // without the result forming a vl preserving vsetvli which is not
- // a correct state merge.
- if (PrevMI.getOperand(0).getReg() == RISCV::X0 &&
- MI.getOperand(1).isReg())
- return false;
- }
- if (!PrevMI.getOperand(2).isImm() || !MI.getOperand(2).isImm())
- return false;
- auto PriorVType = PrevMI.getOperand(2).getImm();
- auto VType = MI.getOperand(2).getImm();
- return areCompatibleVTYPEs(PriorVType, VType, Used);
- }
- void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
- MachineInstr *NextMI = nullptr;
- // We can have arbitrary code in successors, so VL and VTYPE
- // must be considered demanded.
- DemandedFields Used;
- Used.demandVL();
- Used.demandVTYPE();
- SmallVector<MachineInstr*> ToDelete;
- for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) {
- if (!isVectorConfigInstr(MI)) {
- doUnion(Used, getDemanded(MI));
- continue;
- }
- Register VRegDef = MI.getOperand(0).getReg();
- if (VRegDef != RISCV::X0 &&
- !(VRegDef.isVirtual() && MRI->use_nodbg_empty(VRegDef)))
- Used.demandVL();
- if (NextMI) {
- if (!Used.usedVL() && !Used.usedVTYPE()) {
- ToDelete.push_back(&MI);
- // Leave NextMI unchanged
- continue;
- } else if (canMutatePriorConfig(MI, *NextMI, Used)) {
- if (!isVLPreservingConfig(*NextMI)) {
- if (NextMI->getOperand(1).isImm())
- MI.getOperand(1).ChangeToImmediate(NextMI->getOperand(1).getImm());
- else
- MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(), false);
- MI.setDesc(NextMI->getDesc());
- }
- MI.getOperand(2).setImm(NextMI->getOperand(2).getImm());
- ToDelete.push_back(NextMI);
- // fallthrough
- }
- }
- NextMI = &MI;
- Used = getDemanded(MI);
- }
- for (auto *MI : ToDelete)
- MI->eraseFromParent();
- }
- void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) {
- for (auto I = MBB.begin(), E = MBB.end(); I != E;) {
- MachineInstr &MI = *I++;
- if (RISCV::isFaultFirstLoad(MI)) {
- Register VLOutput = MI.getOperand(1).getReg();
- if (!MRI->use_nodbg_empty(VLOutput))
- BuildMI(MBB, I, MI.getDebugLoc(), TII->get(RISCV::PseudoReadVL),
- VLOutput);
- // We don't use the vl output of the VLEFF/VLSEGFF anymore.
- MI.getOperand(1).setReg(RISCV::X0);
- }
- }
- }
- bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
- // Skip if the vector extension is not enabled.
- const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
- if (!ST.hasVInstructions())
- return false;
- LLVM_DEBUG(dbgs() << "Entering InsertVSETVLI for " << MF.getName() << "\n");
- TII = ST.getInstrInfo();
- MRI = &MF.getRegInfo();
- assert(BlockInfo.empty() && "Expect empty block infos");
- BlockInfo.resize(MF.getNumBlockIDs());
- bool HaveVectorOp = false;
- // Phase 1 - determine how VL/VTYPE are affected by the each block.
- for (const MachineBasicBlock &MBB : MF) {
- HaveVectorOp |= computeVLVTYPEChanges(MBB);
- // Initial exit state is whatever change we found in the block.
- BlockData &BBInfo = BlockInfo[MBB.getNumber()];
- BBInfo.Exit = BBInfo.Change;
- LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB)
- << " is " << BBInfo.Exit << "\n");
- }
- // If we didn't find any instructions that need VSETVLI, we're done.
- if (!HaveVectorOp) {
- BlockInfo.clear();
- return false;
- }
- // Phase 2 - determine the exit VL/VTYPE from each block. We add all
- // blocks to the list here, but will also add any that need to be revisited
- // during Phase 2 processing.
- for (const MachineBasicBlock &MBB : MF) {
- WorkList.push(&MBB);
- BlockInfo[MBB.getNumber()].InQueue = true;
- }
- while (!WorkList.empty()) {
- const MachineBasicBlock &MBB = *WorkList.front();
- WorkList.pop();
- computeIncomingVLVTYPE(MBB);
- }
- // Perform partial redundancy elimination of vsetvli transitions.
- for (MachineBasicBlock &MBB : MF)
- doPRE(MBB);
- // Phase 3 - add any vsetvli instructions needed in the block. Use the
- // Phase 2 information to avoid adding vsetvlis before the first vector
- // instruction in the block if the VL/VTYPE is satisfied by its
- // predecessors.
- for (MachineBasicBlock &MBB : MF)
- emitVSETVLIs(MBB);
- // Now that all vsetvlis are explicit, go through and do block local
- // DSE and peephole based demanded fields based transforms. Note that
- // this *must* be done outside the main dataflow so long as we allow
- // any cross block analysis within the dataflow. We can't have both
- // demanded fields based mutation and non-local analysis in the
- // dataflow at the same time without introducing inconsistencies.
- for (MachineBasicBlock &MBB : MF)
- doLocalPostpass(MBB);
- // Once we're fully done rewriting all the instructions, do a final pass
- // through to check for VSETVLIs which write to an unused destination.
- // For the non X0, X0 variant, we can replace the destination register
- // with X0 to reduce register pressure. This is really a generic
- // optimization which can be applied to any dead def (TODO: generalize).
- for (MachineBasicBlock &MBB : MF) {
- for (MachineInstr &MI : MBB) {
- if (MI.getOpcode() == RISCV::PseudoVSETVLI ||
- MI.getOpcode() == RISCV::PseudoVSETIVLI) {
- Register VRegDef = MI.getOperand(0).getReg();
- if (VRegDef != RISCV::X0 && MRI->use_nodbg_empty(VRegDef))
- MI.getOperand(0).setReg(RISCV::X0);
- }
- }
- }
- // Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output
- // of VLEFF/VLSEGFF.
- for (MachineBasicBlock &MBB : MF)
- insertReadVL(MBB);
- BlockInfo.clear();
- return HaveVectorOp;
- }
- /// Returns an instance of the Insert VSETVLI pass.
- FunctionPass *llvm::createRISCVInsertVSETVLIPass() {
- return new RISCVInsertVSETVLI();
- }
|