123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167 |
- //===-- NVPTXPeephole.cpp - NVPTX Peephole Optimiztions -------------------===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- //
- // In NVPTX, NVPTXFrameLowering will emit following instruction at the beginning
- // of a MachineFunction.
- //
- // mov %SPL, %depot
- // cvta.local %SP, %SPL
- //
- // Because Frame Index is a generic address and alloca can only return generic
- // pointer, without this pass the instructions producing alloca'ed address will
- // be based on %SP. NVPTXLowerAlloca tends to help replace store and load on
- // this address with their .local versions, but this may introduce a lot of
- // cvta.to.local instructions. Performance can be improved if we avoid casting
- // address back and forth and directly calculate local address based on %SPL.
- // This peephole pass optimizes these cases, for example
- //
- // It will transform the following pattern
- // %0 = LEA_ADDRi64 %VRFrame64, 4
- // %1 = cvta_to_local_yes_64 %0
- //
- // into
- // %1 = LEA_ADDRi64 %VRFrameLocal64, 4
- //
- // %VRFrameLocal64 is the virtual register name of %SPL
- //
- //===----------------------------------------------------------------------===//
- #include "NVPTX.h"
- #include "NVPTXRegisterInfo.h"
- #include "NVPTXSubtarget.h"
- #include "llvm/CodeGen/MachineFunctionPass.h"
- #include "llvm/CodeGen/MachineInstrBuilder.h"
- #include "llvm/CodeGen/MachineRegisterInfo.h"
- #include "llvm/CodeGen/TargetInstrInfo.h"
- #include "llvm/CodeGen/TargetRegisterInfo.h"
- using namespace llvm;
- #define DEBUG_TYPE "nvptx-peephole"
- namespace llvm {
- void initializeNVPTXPeepholePass(PassRegistry &);
- }
- namespace {
- struct NVPTXPeephole : public MachineFunctionPass {
- public:
- static char ID;
- NVPTXPeephole() : MachineFunctionPass(ID) {
- initializeNVPTXPeepholePass(*PassRegistry::getPassRegistry());
- }
- bool runOnMachineFunction(MachineFunction &MF) override;
- StringRef getPassName() const override {
- return "NVPTX optimize redundant cvta.to.local instruction";
- }
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- MachineFunctionPass::getAnalysisUsage(AU);
- }
- };
- }
- char NVPTXPeephole::ID = 0;
- INITIALIZE_PASS(NVPTXPeephole, "nvptx-peephole", "NVPTX Peephole", false, false)
- static bool isCVTAToLocalCombinationCandidate(MachineInstr &Root) {
- auto &MBB = *Root.getParent();
- auto &MF = *MBB.getParent();
- // Check current instruction is cvta.to.local
- if (Root.getOpcode() != NVPTX::cvta_to_local_yes_64 &&
- Root.getOpcode() != NVPTX::cvta_to_local_yes)
- return false;
- auto &Op = Root.getOperand(1);
- const auto &MRI = MF.getRegInfo();
- MachineInstr *GenericAddrDef = nullptr;
- if (Op.isReg() && Register::isVirtualRegister(Op.getReg())) {
- GenericAddrDef = MRI.getUniqueVRegDef(Op.getReg());
- }
- // Check the register operand is uniquely defined by LEA_ADDRi instruction
- if (!GenericAddrDef || GenericAddrDef->getParent() != &MBB ||
- (GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi64 &&
- GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi)) {
- return false;
- }
- const NVPTXRegisterInfo *NRI =
- MF.getSubtarget<NVPTXSubtarget>().getRegisterInfo();
- // Check the LEA_ADDRi operand is Frame index
- auto &BaseAddrOp = GenericAddrDef->getOperand(1);
- if (BaseAddrOp.isReg() && BaseAddrOp.getReg() == NRI->getFrameRegister(MF)) {
- return true;
- }
- return false;
- }
- static void CombineCVTAToLocal(MachineInstr &Root) {
- auto &MBB = *Root.getParent();
- auto &MF = *MBB.getParent();
- const auto &MRI = MF.getRegInfo();
- const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
- auto &Prev = *MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
- const NVPTXRegisterInfo *NRI =
- MF.getSubtarget<NVPTXSubtarget>().getRegisterInfo();
- MachineInstrBuilder MIB =
- BuildMI(MF, Root.getDebugLoc(), TII->get(Prev.getOpcode()),
- Root.getOperand(0).getReg())
- .addReg(NRI->getFrameLocalRegister(MF))
- .add(Prev.getOperand(2));
- MBB.insert((MachineBasicBlock::iterator)&Root, MIB);
- // Check if MRI has only one non dbg use, which is Root
- if (MRI.hasOneNonDBGUse(Prev.getOperand(0).getReg())) {
- Prev.eraseFromParent();
- }
- Root.eraseFromParent();
- }
- bool NVPTXPeephole::runOnMachineFunction(MachineFunction &MF) {
- if (skipFunction(MF.getFunction()))
- return false;
- bool Changed = false;
- // Loop over all of the basic blocks.
- for (auto &MBB : MF) {
- // Traverse the basic block.
- auto BlockIter = MBB.begin();
- while (BlockIter != MBB.end()) {
- auto &MI = *BlockIter++;
- if (isCVTAToLocalCombinationCandidate(MI)) {
- CombineCVTAToLocal(MI);
- Changed = true;
- }
- } // Instruction
- } // Basic Block
- const NVPTXRegisterInfo *NRI =
- MF.getSubtarget<NVPTXSubtarget>().getRegisterInfo();
- // Remove unnecessary %VRFrame = cvta.local %VRFrameLocal
- const auto &MRI = MF.getRegInfo();
- if (MRI.use_empty(NRI->getFrameRegister(MF))) {
- if (auto MI = MRI.getUniqueVRegDef(NRI->getFrameRegister(MF))) {
- MI->eraseFromParent();
- }
- }
- return Changed;
- }
- MachineFunctionPass *llvm::createNVPTXPeephole() { return new NVPTXPeephole(); }
|