123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262 |
- //=== AArch64PostSelectOptimize.cpp ---------------------------------------===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- //
- // This pass does post-instruction-selection optimizations in the GlobalISel
- // pipeline, before the rest of codegen runs.
- //
- //===----------------------------------------------------------------------===//
- #include "AArch64.h"
- #include "AArch64TargetMachine.h"
- #include "MCTargetDesc/AArch64MCTargetDesc.h"
- #include "llvm/ADT/STLExtras.h"
- #include "llvm/CodeGen/GlobalISel/Utils.h"
- #include "llvm/CodeGen/MachineBasicBlock.h"
- #include "llvm/CodeGen/MachineFunctionPass.h"
- #include "llvm/CodeGen/MachineInstr.h"
- #include "llvm/CodeGen/MachineOperand.h"
- #include "llvm/CodeGen/TargetPassConfig.h"
- #include "llvm/Support/Debug.h"
- #include "llvm/Support/ErrorHandling.h"
- #define DEBUG_TYPE "aarch64-post-select-optimize"
- using namespace llvm;
- namespace {
- class AArch64PostSelectOptimize : public MachineFunctionPass {
- public:
- static char ID;
- AArch64PostSelectOptimize();
- StringRef getPassName() const override {
- return "AArch64 Post Select Optimizer";
- }
- bool runOnMachineFunction(MachineFunction &MF) override;
- void getAnalysisUsage(AnalysisUsage &AU) const override;
- private:
- bool optimizeNZCVDefs(MachineBasicBlock &MBB);
- bool doPeepholeOpts(MachineBasicBlock &MBB);
- /// Look for cross regclass copies that can be trivially eliminated.
- bool foldSimpleCrossClassCopies(MachineInstr &MI);
- };
- } // end anonymous namespace
- void AArch64PostSelectOptimize::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<TargetPassConfig>();
- AU.setPreservesCFG();
- getSelectionDAGFallbackAnalysisUsage(AU);
- MachineFunctionPass::getAnalysisUsage(AU);
- }
- AArch64PostSelectOptimize::AArch64PostSelectOptimize()
- : MachineFunctionPass(ID) {
- initializeAArch64PostSelectOptimizePass(*PassRegistry::getPassRegistry());
- }
- unsigned getNonFlagSettingVariant(unsigned Opc) {
- switch (Opc) {
- default:
- return 0;
- case AArch64::SUBSXrr:
- return AArch64::SUBXrr;
- case AArch64::SUBSWrr:
- return AArch64::SUBWrr;
- case AArch64::SUBSXrs:
- return AArch64::SUBXrs;
- case AArch64::SUBSXri:
- return AArch64::SUBXri;
- case AArch64::SUBSWri:
- return AArch64::SUBWri;
- }
- }
- bool AArch64PostSelectOptimize::doPeepholeOpts(MachineBasicBlock &MBB) {
- bool Changed = false;
- for (auto &MI : make_early_inc_range(make_range(MBB.begin(), MBB.end()))) {
- Changed |= foldSimpleCrossClassCopies(MI);
- }
- return Changed;
- }
- bool AArch64PostSelectOptimize::foldSimpleCrossClassCopies(MachineInstr &MI) {
- auto *MF = MI.getMF();
- auto &MRI = MF->getRegInfo();
- if (!MI.isCopy())
- return false;
- if (MI.getOperand(1).getSubReg())
- return false; // Don't deal with subreg copies
- Register Src = MI.getOperand(1).getReg();
- Register Dst = MI.getOperand(0).getReg();
- if (Src.isPhysical() || Dst.isPhysical())
- return false;
- const TargetRegisterClass *SrcRC = MRI.getRegClass(Src);
- const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
- if (SrcRC == DstRC)
- return false;
- if (SrcRC->hasSubClass(DstRC)) {
- // This is the case where the source class is a superclass of the dest, so
- // if the copy is the only user of the source, we can just constrain the
- // source reg to the dest class.
- if (!MRI.hasOneNonDBGUse(Src))
- return false; // Only constrain single uses of the source.
- // Constrain to dst reg class as long as it's not a weird class that only
- // has a few registers.
- if (!MRI.constrainRegClass(Src, DstRC, /* MinNumRegs */ 25))
- return false;
- } else if (DstRC->hasSubClass(SrcRC)) {
- // This is the inverse case, where the destination class is a superclass of
- // the source. Here, if the copy is the only user, we can just constrain
- // the user of the copy to use the smaller class of the source.
- } else {
- return false;
- }
- MRI.replaceRegWith(Dst, Src);
- MI.eraseFromParent();
- return true;
- }
- bool AArch64PostSelectOptimize::optimizeNZCVDefs(MachineBasicBlock &MBB) {
- // Consider the following code:
- // FCMPSrr %0, %1, implicit-def $nzcv
- // %sel1:gpr32 = CSELWr %_, %_, 12, implicit $nzcv
- // %sub:gpr32 = SUBSWrr %_, %_, implicit-def $nzcv
- // FCMPSrr %0, %1, implicit-def $nzcv
- // %sel2:gpr32 = CSELWr %_, %_, 12, implicit $nzcv
- // This kind of code where we have 2 FCMPs each feeding a CSEL can happen
- // when we have a single IR fcmp being used by two selects. During selection,
- // to ensure that there can be no clobbering of nzcv between the fcmp and the
- // csel, we have to generate an fcmp immediately before each csel is
- // selected.
- // However, often we can essentially CSE these together later in MachineCSE.
- // This doesn't work though if there are unrelated flag-setting instructions
- // in between the two FCMPs. In this case, the SUBS defines NZCV
- // but it doesn't have any users, being overwritten by the second FCMP.
- //
- // Our solution here is to try to convert flag setting operations between
- // a interval of identical FCMPs, so that CSE will be able to eliminate one.
- bool Changed = false;
- auto &MF = *MBB.getParent();
- auto &Subtarget = MF.getSubtarget();
- const auto &TII = Subtarget.getInstrInfo();
- auto TRI = Subtarget.getRegisterInfo();
- auto RBI = Subtarget.getRegBankInfo();
- auto &MRI = MF.getRegInfo();
- // The first step is to find the first and last FCMPs. If we have found
- // at least two, then set the limit of the bottom-up walk to the first FCMP
- // found since we're only interested in dealing with instructions between
- // them.
- MachineInstr *FirstCmp = nullptr, *LastCmp = nullptr;
- for (auto &MI : instructionsWithoutDebug(MBB.begin(), MBB.end())) {
- if (MI.getOpcode() == AArch64::FCMPSrr ||
- MI.getOpcode() == AArch64::FCMPDrr) {
- if (!FirstCmp)
- FirstCmp = &MI;
- else
- LastCmp = &MI;
- }
- }
- // In addition to converting flag-setting ops in fcmp ranges into non-flag
- // setting ops, across the whole basic block we also detect when nzcv
- // implicit-defs are dead, and mark them as dead. Peephole optimizations need
- // this information later.
- LiveRegUnits LRU(*MBB.getParent()->getSubtarget().getRegisterInfo());
- LRU.addLiveOuts(MBB);
- bool NZCVDead = LRU.available(AArch64::NZCV);
- bool InsideCmpRange = false;
- for (auto &II : instructionsWithoutDebug(MBB.rbegin(), MBB.rend())) {
- LRU.stepBackward(II);
- if (LastCmp) { // There's a range present in this block.
- // If we're inside an fcmp range, look for begin instruction.
- if (InsideCmpRange && &II == FirstCmp)
- InsideCmpRange = false;
- else if (&II == LastCmp)
- InsideCmpRange = true;
- }
- // Did this instruction define NZCV?
- bool NZCVDeadAtCurrInstr = LRU.available(AArch64::NZCV);
- if (NZCVDead && NZCVDeadAtCurrInstr && II.definesRegister(AArch64::NZCV)) {
- // If we have a def and NZCV is dead, then we may convert this op.
- unsigned NewOpc = getNonFlagSettingVariant(II.getOpcode());
- int DeadNZCVIdx = II.findRegisterDefOperandIdx(AArch64::NZCV);
- if (DeadNZCVIdx != -1) {
- // If we're inside an fcmp range, then convert flag setting ops.
- if (InsideCmpRange && NewOpc) {
- LLVM_DEBUG(dbgs() << "Post-select optimizer: converting flag-setting "
- "op in fcmp range: "
- << II);
- II.setDesc(TII->get(NewOpc));
- II.removeOperand(DeadNZCVIdx);
- // Changing the opcode can result in differing regclass requirements,
- // e.g. SUBSWri uses gpr32 for the dest, whereas SUBWri uses gpr32sp.
- // Constrain the regclasses, possibly introducing a copy.
- constrainOperandRegClass(MF, *TRI, MRI, *TII, *RBI, II, II.getDesc(),
- II.getOperand(0), 0);
- Changed |= true;
- } else {
- // Otherwise, we just set the nzcv imp-def operand to be dead, so the
- // peephole optimizations can optimize them further.
- II.getOperand(DeadNZCVIdx).setIsDead();
- }
- }
- }
- NZCVDead = NZCVDeadAtCurrInstr;
- }
- return Changed;
- }
- bool AArch64PostSelectOptimize::runOnMachineFunction(MachineFunction &MF) {
- if (MF.getProperties().hasProperty(
- MachineFunctionProperties::Property::FailedISel))
- return false;
- assert(MF.getProperties().hasProperty(
- MachineFunctionProperties::Property::Selected) &&
- "Expected a selected MF");
- bool Changed = false;
- for (auto &BB : MF) {
- Changed |= optimizeNZCVDefs(BB);
- Changed |= doPeepholeOpts(BB);
- }
- return Changed;
- }
- char AArch64PostSelectOptimize::ID = 0;
- INITIALIZE_PASS_BEGIN(AArch64PostSelectOptimize, DEBUG_TYPE,
- "Optimize AArch64 selected instructions",
- false, false)
- INITIALIZE_PASS_END(AArch64PostSelectOptimize, DEBUG_TYPE,
- "Optimize AArch64 selected instructions", false,
- false)
- namespace llvm {
- FunctionPass *createAArch64PostSelectOptimize() {
- return new AArch64PostSelectOptimize();
- }
- } // end namespace llvm
|