AArch64PostSelectOptimize.cpp 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198
  1. //=== AArch64PostSelectOptimize.cpp ---------------------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This pass does post-instruction-selection optimizations in the GlobalISel
  10. // pipeline, before the rest of codegen runs.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "AArch64.h"
  14. #include "AArch64TargetMachine.h"
  15. #include "MCTargetDesc/AArch64MCTargetDesc.h"
  16. #include "llvm/CodeGen/GlobalISel/Utils.h"
  17. #include "llvm/CodeGen/MachineBasicBlock.h"
  18. #include "llvm/CodeGen/MachineFunctionPass.h"
  19. #include "llvm/CodeGen/MachineOperand.h"
  20. #include "llvm/CodeGen/TargetPassConfig.h"
  21. #include "llvm/Support/Debug.h"
  22. #define DEBUG_TYPE "aarch64-post-select-optimize"
  23. using namespace llvm;
  24. namespace {
  25. class AArch64PostSelectOptimize : public MachineFunctionPass {
  26. public:
  27. static char ID;
  28. AArch64PostSelectOptimize();
  29. StringRef getPassName() const override {
  30. return "AArch64 Post Select Optimizer";
  31. }
  32. bool runOnMachineFunction(MachineFunction &MF) override;
  33. void getAnalysisUsage(AnalysisUsage &AU) const override;
  34. private:
  35. bool optimizeNZCVDefs(MachineBasicBlock &MBB);
  36. };
  37. } // end anonymous namespace
  38. void AArch64PostSelectOptimize::getAnalysisUsage(AnalysisUsage &AU) const {
  39. AU.addRequired<TargetPassConfig>();
  40. AU.setPreservesCFG();
  41. getSelectionDAGFallbackAnalysisUsage(AU);
  42. MachineFunctionPass::getAnalysisUsage(AU);
  43. }
  44. AArch64PostSelectOptimize::AArch64PostSelectOptimize()
  45. : MachineFunctionPass(ID) {
  46. initializeAArch64PostSelectOptimizePass(*PassRegistry::getPassRegistry());
  47. }
  48. unsigned getNonFlagSettingVariant(unsigned Opc) {
  49. switch (Opc) {
  50. default:
  51. return 0;
  52. case AArch64::SUBSXrr:
  53. return AArch64::SUBXrr;
  54. case AArch64::SUBSWrr:
  55. return AArch64::SUBWrr;
  56. case AArch64::SUBSXrs:
  57. return AArch64::SUBXrs;
  58. case AArch64::SUBSXri:
  59. return AArch64::SUBXri;
  60. case AArch64::SUBSWri:
  61. return AArch64::SUBWri;
  62. }
  63. }
  64. bool AArch64PostSelectOptimize::optimizeNZCVDefs(MachineBasicBlock &MBB) {
  65. // Consider the following code:
  66. // FCMPSrr %0, %1, implicit-def $nzcv
  67. // %sel1:gpr32 = CSELWr %_, %_, 12, implicit $nzcv
  68. // %sub:gpr32 = SUBSWrr %_, %_, implicit-def $nzcv
  69. // FCMPSrr %0, %1, implicit-def $nzcv
  70. // %sel2:gpr32 = CSELWr %_, %_, 12, implicit $nzcv
  71. // This kind of code where we have 2 FCMPs each feeding a CSEL can happen
  72. // when we have a single IR fcmp being used by two selects. During selection,
  73. // to ensure that there can be no clobbering of nzcv between the fcmp and the
  74. // csel, we have to generate an fcmp immediately before each csel is
  75. // selected.
  76. // However, often we can essentially CSE these together later in MachineCSE.
  77. // This doesn't work though if there are unrelated flag-setting instructions
  78. // in between the two FCMPs. In this case, the SUBS defines NZCV
  79. // but it doesn't have any users, being overwritten by the second FCMP.
  80. //
  81. // Our solution here is to try to convert flag setting operations between
  82. // a interval of identical FCMPs, so that CSE will be able to eliminate one.
  83. bool Changed = false;
  84. auto &MF = *MBB.getParent();
  85. auto &Subtarget = MF.getSubtarget();
  86. const auto &TII = Subtarget.getInstrInfo();
  87. auto TRI = Subtarget.getRegisterInfo();
  88. auto RBI = Subtarget.getRegBankInfo();
  89. auto &MRI = MF.getRegInfo();
  90. // The first step is to find the first and last FCMPs. If we have found
  91. // at least two, then set the limit of the bottom-up walk to the first FCMP
  92. // found since we're only interested in dealing with instructions between
  93. // them.
  94. MachineInstr *FirstCmp = nullptr, *LastCmp = nullptr;
  95. for (auto &MI : instructionsWithoutDebug(MBB.begin(), MBB.end())) {
  96. if (MI.getOpcode() == AArch64::FCMPSrr ||
  97. MI.getOpcode() == AArch64::FCMPDrr) {
  98. if (!FirstCmp)
  99. FirstCmp = &MI;
  100. else
  101. LastCmp = &MI;
  102. }
  103. }
  104. // In addition to converting flag-setting ops in fcmp ranges into non-flag
  105. // setting ops, across the whole basic block we also detect when nzcv
  106. // implicit-defs are dead, and mark them as dead. Peephole optimizations need
  107. // this information later.
  108. LiveRegUnits LRU(*MBB.getParent()->getSubtarget().getRegisterInfo());
  109. LRU.addLiveOuts(MBB);
  110. bool NZCVDead = LRU.available(AArch64::NZCV);
  111. bool InsideCmpRange = false;
  112. for (auto &II : instructionsWithoutDebug(MBB.rbegin(), MBB.rend())) {
  113. LRU.stepBackward(II);
  114. if (LastCmp) { // There's a range present in this block.
  115. // If we're inside an fcmp range, look for begin instruction.
  116. if (InsideCmpRange && &II == FirstCmp)
  117. InsideCmpRange = false;
  118. else if (&II == LastCmp)
  119. InsideCmpRange = true;
  120. }
  121. // Did this instruction define NZCV?
  122. bool NZCVDeadAtCurrInstr = LRU.available(AArch64::NZCV);
  123. if (NZCVDead && NZCVDeadAtCurrInstr && II.definesRegister(AArch64::NZCV)) {
  124. // If we have a def and NZCV is dead, then we may convert this op.
  125. unsigned NewOpc = getNonFlagSettingVariant(II.getOpcode());
  126. int DeadNZCVIdx = II.findRegisterDefOperandIdx(AArch64::NZCV);
  127. if (DeadNZCVIdx != -1) {
  128. // If we're inside an fcmp range, then convert flag setting ops.
  129. if (InsideCmpRange && NewOpc) {
  130. LLVM_DEBUG(dbgs() << "Post-select optimizer: converting flag-setting "
  131. "op in fcmp range: "
  132. << II);
  133. II.setDesc(TII->get(NewOpc));
  134. II.RemoveOperand(DeadNZCVIdx);
  135. // Changing the opcode can result in differing regclass requirements,
  136. // e.g. SUBSWri uses gpr32 for the dest, whereas SUBWri uses gpr32sp.
  137. // Constrain the regclasses, possibly introducing a copy.
  138. constrainOperandRegClass(MF, *TRI, MRI, *TII, *RBI, II, II.getDesc(),
  139. II.getOperand(0), 0);
  140. Changed |= true;
  141. } else {
  142. // Otherwise, we just set the nzcv imp-def operand to be dead, so the
  143. // peephole optimizations can optimize them further.
  144. II.getOperand(DeadNZCVIdx).setIsDead();
  145. }
  146. }
  147. }
  148. NZCVDead = NZCVDeadAtCurrInstr;
  149. }
  150. return Changed;
  151. }
  152. bool AArch64PostSelectOptimize::runOnMachineFunction(MachineFunction &MF) {
  153. if (MF.getProperties().hasProperty(
  154. MachineFunctionProperties::Property::FailedISel))
  155. return false;
  156. assert(MF.getProperties().hasProperty(
  157. MachineFunctionProperties::Property::Selected) &&
  158. "Expected a selected MF");
  159. bool Changed = false;
  160. for (auto &BB : MF)
  161. Changed |= optimizeNZCVDefs(BB);
  162. return Changed;
  163. }
  164. char AArch64PostSelectOptimize::ID = 0;
  165. INITIALIZE_PASS_BEGIN(AArch64PostSelectOptimize, DEBUG_TYPE,
  166. "Optimize AArch64 selected instructions",
  167. false, false)
  168. INITIALIZE_PASS_END(AArch64PostSelectOptimize, DEBUG_TYPE,
  169. "Optimize AArch64 selected instructions", false,
  170. false)
  171. namespace llvm {
  172. FunctionPass *createAArch64PostSelectOptimize() {
  173. return new AArch64PostSelectOptimize();
  174. }
  175. } // end namespace llvm