NVPTXPeephole.cpp 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167
  1. //===-- NVPTXPeephole.cpp - NVPTX Peephole Optimiztions -------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // In NVPTX, NVPTXFrameLowering will emit following instruction at the beginning
  10. // of a MachineFunction.
  11. //
  12. // mov %SPL, %depot
  13. // cvta.local %SP, %SPL
  14. //
  15. // Because Frame Index is a generic address and alloca can only return generic
  16. // pointer, without this pass the instructions producing alloca'ed address will
  17. // be based on %SP. NVPTXLowerAlloca tends to help replace store and load on
  18. // this address with their .local versions, but this may introduce a lot of
  19. // cvta.to.local instructions. Performance can be improved if we avoid casting
  20. // address back and forth and directly calculate local address based on %SPL.
  21. // This peephole pass optimizes these cases, for example
  22. //
  23. // It will transform the following pattern
  24. // %0 = LEA_ADDRi64 %VRFrame64, 4
  25. // %1 = cvta_to_local_yes_64 %0
  26. //
  27. // into
  28. // %1 = LEA_ADDRi64 %VRFrameLocal64, 4
  29. //
  30. // %VRFrameLocal64 is the virtual register name of %SPL
  31. //
  32. //===----------------------------------------------------------------------===//
  33. #include "NVPTX.h"
  34. #include "NVPTXRegisterInfo.h"
  35. #include "NVPTXSubtarget.h"
  36. #include "llvm/CodeGen/MachineFunctionPass.h"
  37. #include "llvm/CodeGen/MachineInstrBuilder.h"
  38. #include "llvm/CodeGen/MachineRegisterInfo.h"
  39. #include "llvm/CodeGen/TargetInstrInfo.h"
  40. #include "llvm/CodeGen/TargetRegisterInfo.h"
  41. using namespace llvm;
  42. #define DEBUG_TYPE "nvptx-peephole"
  43. namespace llvm {
  44. void initializeNVPTXPeepholePass(PassRegistry &);
  45. }
  46. namespace {
  47. struct NVPTXPeephole : public MachineFunctionPass {
  48. public:
  49. static char ID;
  50. NVPTXPeephole() : MachineFunctionPass(ID) {
  51. initializeNVPTXPeepholePass(*PassRegistry::getPassRegistry());
  52. }
  53. bool runOnMachineFunction(MachineFunction &MF) override;
  54. StringRef getPassName() const override {
  55. return "NVPTX optimize redundant cvta.to.local instruction";
  56. }
  57. void getAnalysisUsage(AnalysisUsage &AU) const override {
  58. MachineFunctionPass::getAnalysisUsage(AU);
  59. }
  60. };
  61. }
  62. char NVPTXPeephole::ID = 0;
  63. INITIALIZE_PASS(NVPTXPeephole, "nvptx-peephole", "NVPTX Peephole", false, false)
  64. static bool isCVTAToLocalCombinationCandidate(MachineInstr &Root) {
  65. auto &MBB = *Root.getParent();
  66. auto &MF = *MBB.getParent();
  67. // Check current instruction is cvta.to.local
  68. if (Root.getOpcode() != NVPTX::cvta_to_local_yes_64 &&
  69. Root.getOpcode() != NVPTX::cvta_to_local_yes)
  70. return false;
  71. auto &Op = Root.getOperand(1);
  72. const auto &MRI = MF.getRegInfo();
  73. MachineInstr *GenericAddrDef = nullptr;
  74. if (Op.isReg() && Op.getReg().isVirtual()) {
  75. GenericAddrDef = MRI.getUniqueVRegDef(Op.getReg());
  76. }
  77. // Check the register operand is uniquely defined by LEA_ADDRi instruction
  78. if (!GenericAddrDef || GenericAddrDef->getParent() != &MBB ||
  79. (GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi64 &&
  80. GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi)) {
  81. return false;
  82. }
  83. const NVPTXRegisterInfo *NRI =
  84. MF.getSubtarget<NVPTXSubtarget>().getRegisterInfo();
  85. // Check the LEA_ADDRi operand is Frame index
  86. auto &BaseAddrOp = GenericAddrDef->getOperand(1);
  87. if (BaseAddrOp.isReg() && BaseAddrOp.getReg() == NRI->getFrameRegister(MF)) {
  88. return true;
  89. }
  90. return false;
  91. }
  92. static void CombineCVTAToLocal(MachineInstr &Root) {
  93. auto &MBB = *Root.getParent();
  94. auto &MF = *MBB.getParent();
  95. const auto &MRI = MF.getRegInfo();
  96. const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
  97. auto &Prev = *MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
  98. const NVPTXRegisterInfo *NRI =
  99. MF.getSubtarget<NVPTXSubtarget>().getRegisterInfo();
  100. MachineInstrBuilder MIB =
  101. BuildMI(MF, Root.getDebugLoc(), TII->get(Prev.getOpcode()),
  102. Root.getOperand(0).getReg())
  103. .addReg(NRI->getFrameLocalRegister(MF))
  104. .add(Prev.getOperand(2));
  105. MBB.insert((MachineBasicBlock::iterator)&Root, MIB);
  106. // Check if MRI has only one non dbg use, which is Root
  107. if (MRI.hasOneNonDBGUse(Prev.getOperand(0).getReg())) {
  108. Prev.eraseFromParent();
  109. }
  110. Root.eraseFromParent();
  111. }
  112. bool NVPTXPeephole::runOnMachineFunction(MachineFunction &MF) {
  113. if (skipFunction(MF.getFunction()))
  114. return false;
  115. bool Changed = false;
  116. // Loop over all of the basic blocks.
  117. for (auto &MBB : MF) {
  118. // Traverse the basic block.
  119. auto BlockIter = MBB.begin();
  120. while (BlockIter != MBB.end()) {
  121. auto &MI = *BlockIter++;
  122. if (isCVTAToLocalCombinationCandidate(MI)) {
  123. CombineCVTAToLocal(MI);
  124. Changed = true;
  125. }
  126. } // Instruction
  127. } // Basic Block
  128. const NVPTXRegisterInfo *NRI =
  129. MF.getSubtarget<NVPTXSubtarget>().getRegisterInfo();
  130. // Remove unnecessary %VRFrame = cvta.local %VRFrameLocal
  131. const auto &MRI = MF.getRegInfo();
  132. if (MRI.use_empty(NRI->getFrameRegister(MF))) {
  133. if (auto MI = MRI.getUniqueVRegDef(NRI->getFrameRegister(MF))) {
  134. MI->eraseFromParent();
  135. }
  136. }
  137. return Changed;
  138. }
  139. MachineFunctionPass *llvm::createNVPTXPeephole() { return new NVPTXPeephole(); }