123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900 |
- //===-- X86FixupLEAs.cpp - use or replace LEA instructions -----------===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- //
- // This file defines the pass that finds instructions that can be
- // re-written as LEA instructions in order to reduce pipeline delays.
- // It replaces LEAs with ADD/INC/DEC when that is better for size/speed.
- //
- //===----------------------------------------------------------------------===//
- #include "X86.h"
- #include "X86InstrInfo.h"
- #include "X86Subtarget.h"
- #include "llvm/ADT/Statistic.h"
- #include "llvm/Analysis/ProfileSummaryInfo.h"
- #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
- #include "llvm/CodeGen/MachineFunctionPass.h"
- #include "llvm/CodeGen/MachineInstrBuilder.h"
- #include "llvm/CodeGen/MachineSizeOpts.h"
- #include "llvm/CodeGen/Passes.h"
- #include "llvm/CodeGen/TargetSchedule.h"
- #include "llvm/Support/Debug.h"
- #include "llvm/Support/raw_ostream.h"
- using namespace llvm;
- #define FIXUPLEA_DESC "X86 LEA Fixup"
- #define FIXUPLEA_NAME "x86-fixup-LEAs"
- #define DEBUG_TYPE FIXUPLEA_NAME
- STATISTIC(NumLEAs, "Number of LEA instructions created");
- namespace {
- class FixupLEAPass : public MachineFunctionPass {
- enum RegUsageState { RU_NotUsed, RU_Write, RU_Read };
- /// Given a machine register, look for the instruction
- /// which writes it in the current basic block. If found,
- /// try to replace it with an equivalent LEA instruction.
- /// If replacement succeeds, then also process the newly created
- /// instruction.
- void seekLEAFixup(MachineOperand &p, MachineBasicBlock::iterator &I,
- MachineBasicBlock &MBB);
- /// Given a memory access or LEA instruction
- /// whose address mode uses a base and/or index register, look for
- /// an opportunity to replace the instruction which sets the base or index
- /// register with an equivalent LEA instruction.
- void processInstruction(MachineBasicBlock::iterator &I,
- MachineBasicBlock &MBB);
- /// Given a LEA instruction which is unprofitable
- /// on SlowLEA targets try to replace it with an equivalent ADD instruction.
- void processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
- MachineBasicBlock &MBB);
- /// Given a LEA instruction which is unprofitable
- /// on SNB+ try to replace it with other instructions.
- /// According to Intel's Optimization Reference Manual:
- /// " For LEA instructions with three source operands and some specific
- /// situations, instruction latency has increased to 3 cycles, and must
- /// dispatch via port 1:
- /// - LEA that has all three source operands: base, index, and offset
- /// - LEA that uses base and index registers where the base is EBP, RBP,
- /// or R13
- /// - LEA that uses RIP relative addressing mode
- /// - LEA that uses 16-bit addressing mode "
- /// This function currently handles the first 2 cases only.
- void processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I,
- MachineBasicBlock &MBB, bool OptIncDec);
- /// Look for LEAs that are really two address LEAs that we might be able to
- /// turn into regular ADD instructions.
- bool optTwoAddrLEA(MachineBasicBlock::iterator &I,
- MachineBasicBlock &MBB, bool OptIncDec,
- bool UseLEAForSP) const;
- /// Look for and transform the sequence
- /// lea (reg1, reg2), reg3
- /// sub reg3, reg4
- /// to
- /// sub reg1, reg4
- /// sub reg2, reg4
- /// It can also optimize the sequence lea/add similarly.
- bool optLEAALU(MachineBasicBlock::iterator &I, MachineBasicBlock &MBB) const;
- /// Step forwards in MBB, looking for an ADD/SUB instruction which uses
- /// the dest register of LEA instruction I.
- MachineBasicBlock::iterator searchALUInst(MachineBasicBlock::iterator &I,
- MachineBasicBlock &MBB) const;
- /// Check instructions between LeaI and AluI (exclusively).
- /// Set BaseIndexDef to true if base or index register from LeaI is defined.
- /// Set AluDestRef to true if the dest register of AluI is used or defined.
- /// *KilledBase is set to the killed base register usage.
- /// *KilledIndex is set to the killed index register usage.
- void checkRegUsage(MachineBasicBlock::iterator &LeaI,
- MachineBasicBlock::iterator &AluI, bool &BaseIndexDef,
- bool &AluDestRef, MachineOperand **KilledBase,
- MachineOperand **KilledIndex) const;
- /// Determine if an instruction references a machine register
- /// and, if so, whether it reads or writes the register.
- RegUsageState usesRegister(MachineOperand &p, MachineBasicBlock::iterator I);
- /// Step backwards through a basic block, looking
- /// for an instruction which writes a register within
- /// a maximum of INSTR_DISTANCE_THRESHOLD instruction latency cycles.
- MachineBasicBlock::iterator searchBackwards(MachineOperand &p,
- MachineBasicBlock::iterator &I,
- MachineBasicBlock &MBB);
- /// if an instruction can be converted to an
- /// equivalent LEA, insert the new instruction into the basic block
- /// and return a pointer to it. Otherwise, return zero.
- MachineInstr *postRAConvertToLEA(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI) const;
- public:
- static char ID;
- StringRef getPassName() const override { return FIXUPLEA_DESC; }
- FixupLEAPass() : MachineFunctionPass(ID) { }
- /// Loop over all of the basic blocks,
- /// replacing instructions by equivalent LEA instructions
- /// if needed and when possible.
- bool runOnMachineFunction(MachineFunction &MF) override;
- // This pass runs after regalloc and doesn't support VReg operands.
- MachineFunctionProperties getRequiredProperties() const override {
- return MachineFunctionProperties().set(
- MachineFunctionProperties::Property::NoVRegs);
- }
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<ProfileSummaryInfoWrapperPass>();
- AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
- private:
- TargetSchedModel TSM;
- const X86InstrInfo *TII = nullptr;
- const X86RegisterInfo *TRI = nullptr;
- };
- }
- char FixupLEAPass::ID = 0;
- INITIALIZE_PASS(FixupLEAPass, FIXUPLEA_NAME, FIXUPLEA_DESC, false, false)
- MachineInstr *
- FixupLEAPass::postRAConvertToLEA(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI) const {
- MachineInstr &MI = *MBBI;
- switch (MI.getOpcode()) {
- case X86::MOV32rr:
- case X86::MOV64rr: {
- const MachineOperand &Src = MI.getOperand(1);
- const MachineOperand &Dest = MI.getOperand(0);
- MachineInstr *NewMI =
- BuildMI(MBB, MBBI, MI.getDebugLoc(),
- TII->get(MI.getOpcode() == X86::MOV32rr ? X86::LEA32r
- : X86::LEA64r))
- .add(Dest)
- .add(Src)
- .addImm(1)
- .addReg(0)
- .addImm(0)
- .addReg(0);
- return NewMI;
- }
- }
- if (!MI.isConvertibleTo3Addr())
- return nullptr;
- switch (MI.getOpcode()) {
- default:
- // Only convert instructions that we've verified are safe.
- return nullptr;
- case X86::ADD64ri32:
- case X86::ADD64ri8:
- case X86::ADD64ri32_DB:
- case X86::ADD64ri8_DB:
- case X86::ADD32ri:
- case X86::ADD32ri8:
- case X86::ADD32ri_DB:
- case X86::ADD32ri8_DB:
- if (!MI.getOperand(2).isImm()) {
- // convertToThreeAddress will call getImm()
- // which requires isImm() to be true
- return nullptr;
- }
- break;
- case X86::SHL64ri:
- case X86::SHL32ri:
- case X86::INC64r:
- case X86::INC32r:
- case X86::DEC64r:
- case X86::DEC32r:
- case X86::ADD64rr:
- case X86::ADD64rr_DB:
- case X86::ADD32rr:
- case X86::ADD32rr_DB:
- // These instructions are all fine to convert.
- break;
- }
- return TII->convertToThreeAddress(MI, nullptr, nullptr);
- }
- FunctionPass *llvm::createX86FixupLEAs() { return new FixupLEAPass(); }
- static bool isLEA(unsigned Opcode) {
- return Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
- Opcode == X86::LEA64_32r;
- }
- bool FixupLEAPass::runOnMachineFunction(MachineFunction &MF) {
- if (skipFunction(MF.getFunction()))
- return false;
- const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
- bool IsSlowLEA = ST.slowLEA();
- bool IsSlow3OpsLEA = ST.slow3OpsLEA();
- bool LEAUsesAG = ST.LEAusesAG();
- bool OptIncDec = !ST.slowIncDec() || MF.getFunction().hasOptSize();
- bool UseLEAForSP = ST.useLeaForSP();
- TSM.init(&ST);
- TII = ST.getInstrInfo();
- TRI = ST.getRegisterInfo();
- auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
- auto *MBFI = (PSI && PSI->hasProfileSummary())
- ? &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI()
- : nullptr;
- LLVM_DEBUG(dbgs() << "Start X86FixupLEAs\n";);
- for (MachineBasicBlock &MBB : MF) {
- // First pass. Try to remove or optimize existing LEAs.
- bool OptIncDecPerBB =
- OptIncDec || llvm::shouldOptimizeForSize(&MBB, PSI, MBFI);
- for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
- if (!isLEA(I->getOpcode()))
- continue;
- if (optTwoAddrLEA(I, MBB, OptIncDecPerBB, UseLEAForSP))
- continue;
- if (IsSlowLEA)
- processInstructionForSlowLEA(I, MBB);
- else if (IsSlow3OpsLEA)
- processInstrForSlow3OpLEA(I, MBB, OptIncDecPerBB);
- }
- // Second pass for creating LEAs. This may reverse some of the
- // transformations above.
- if (LEAUsesAG) {
- for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
- processInstruction(I, MBB);
- }
- }
- LLVM_DEBUG(dbgs() << "End X86FixupLEAs\n";);
- return true;
- }
- FixupLEAPass::RegUsageState
- FixupLEAPass::usesRegister(MachineOperand &p, MachineBasicBlock::iterator I) {
- RegUsageState RegUsage = RU_NotUsed;
- MachineInstr &MI = *I;
- for (const MachineOperand &MO : MI.operands()) {
- if (MO.isReg() && MO.getReg() == p.getReg()) {
- if (MO.isDef())
- return RU_Write;
- RegUsage = RU_Read;
- }
- }
- return RegUsage;
- }
- /// getPreviousInstr - Given a reference to an instruction in a basic
- /// block, return a reference to the previous instruction in the block,
- /// wrapping around to the last instruction of the block if the block
- /// branches to itself.
- static inline bool getPreviousInstr(MachineBasicBlock::iterator &I,
- MachineBasicBlock &MBB) {
- if (I == MBB.begin()) {
- if (MBB.isPredecessor(&MBB)) {
- I = --MBB.end();
- return true;
- } else
- return false;
- }
- --I;
- return true;
- }
- MachineBasicBlock::iterator
- FixupLEAPass::searchBackwards(MachineOperand &p, MachineBasicBlock::iterator &I,
- MachineBasicBlock &MBB) {
- int InstrDistance = 1;
- MachineBasicBlock::iterator CurInst;
- static const int INSTR_DISTANCE_THRESHOLD = 5;
- CurInst = I;
- bool Found;
- Found = getPreviousInstr(CurInst, MBB);
- while (Found && I != CurInst) {
- if (CurInst->isCall() || CurInst->isInlineAsm())
- break;
- if (InstrDistance > INSTR_DISTANCE_THRESHOLD)
- break; // too far back to make a difference
- if (usesRegister(p, CurInst) == RU_Write) {
- return CurInst;
- }
- InstrDistance += TSM.computeInstrLatency(&*CurInst);
- Found = getPreviousInstr(CurInst, MBB);
- }
- return MachineBasicBlock::iterator();
- }
- static inline bool isInefficientLEAReg(unsigned Reg) {
- return Reg == X86::EBP || Reg == X86::RBP ||
- Reg == X86::R13D || Reg == X86::R13;
- }
- /// Returns true if this LEA uses base an index registers, and the base register
- /// is known to be inefficient for the subtarget.
- // TODO: use a variant scheduling class to model the latency profile
- // of LEA instructions, and implement this logic as a scheduling predicate.
- static inline bool hasInefficientLEABaseReg(const MachineOperand &Base,
- const MachineOperand &Index) {
- return Base.isReg() && isInefficientLEAReg(Base.getReg()) && Index.isReg() &&
- Index.getReg() != X86::NoRegister;
- }
- static inline bool hasLEAOffset(const MachineOperand &Offset) {
- return (Offset.isImm() && Offset.getImm() != 0) || Offset.isGlobal();
- }
- static inline unsigned getADDrrFromLEA(unsigned LEAOpcode) {
- switch (LEAOpcode) {
- default:
- llvm_unreachable("Unexpected LEA instruction");
- case X86::LEA32r:
- case X86::LEA64_32r:
- return X86::ADD32rr;
- case X86::LEA64r:
- return X86::ADD64rr;
- }
- }
- static inline unsigned getSUBrrFromLEA(unsigned LEAOpcode) {
- switch (LEAOpcode) {
- default:
- llvm_unreachable("Unexpected LEA instruction");
- case X86::LEA32r:
- case X86::LEA64_32r:
- return X86::SUB32rr;
- case X86::LEA64r:
- return X86::SUB64rr;
- }
- }
- static inline unsigned getADDriFromLEA(unsigned LEAOpcode,
- const MachineOperand &Offset) {
- bool IsInt8 = Offset.isImm() && isInt<8>(Offset.getImm());
- switch (LEAOpcode) {
- default:
- llvm_unreachable("Unexpected LEA instruction");
- case X86::LEA32r:
- case X86::LEA64_32r:
- return IsInt8 ? X86::ADD32ri8 : X86::ADD32ri;
- case X86::LEA64r:
- return IsInt8 ? X86::ADD64ri8 : X86::ADD64ri32;
- }
- }
- static inline unsigned getINCDECFromLEA(unsigned LEAOpcode, bool IsINC) {
- switch (LEAOpcode) {
- default:
- llvm_unreachable("Unexpected LEA instruction");
- case X86::LEA32r:
- case X86::LEA64_32r:
- return IsINC ? X86::INC32r : X86::DEC32r;
- case X86::LEA64r:
- return IsINC ? X86::INC64r : X86::DEC64r;
- }
- }
- MachineBasicBlock::iterator
- FixupLEAPass::searchALUInst(MachineBasicBlock::iterator &I,
- MachineBasicBlock &MBB) const {
- const int InstrDistanceThreshold = 5;
- int InstrDistance = 1;
- MachineBasicBlock::iterator CurInst = std::next(I);
- unsigned LEAOpcode = I->getOpcode();
- unsigned AddOpcode = getADDrrFromLEA(LEAOpcode);
- unsigned SubOpcode = getSUBrrFromLEA(LEAOpcode);
- Register DestReg = I->getOperand(0).getReg();
- while (CurInst != MBB.end()) {
- if (CurInst->isCall() || CurInst->isInlineAsm())
- break;
- if (InstrDistance > InstrDistanceThreshold)
- break;
- // Check if the lea dest register is used in an add/sub instruction only.
- for (unsigned I = 0, E = CurInst->getNumOperands(); I != E; ++I) {
- MachineOperand &Opnd = CurInst->getOperand(I);
- if (Opnd.isReg()) {
- if (Opnd.getReg() == DestReg) {
- if (Opnd.isDef() || !Opnd.isKill())
- return MachineBasicBlock::iterator();
- unsigned AluOpcode = CurInst->getOpcode();
- if (AluOpcode != AddOpcode && AluOpcode != SubOpcode)
- return MachineBasicBlock::iterator();
- MachineOperand &Opnd2 = CurInst->getOperand(3 - I);
- MachineOperand AluDest = CurInst->getOperand(0);
- if (Opnd2.getReg() != AluDest.getReg())
- return MachineBasicBlock::iterator();
- // X - (Y + Z) may generate different flags than (X - Y) - Z when
- // there is overflow. So we can't change the alu instruction if the
- // flags register is live.
- if (!CurInst->registerDefIsDead(X86::EFLAGS, TRI))
- return MachineBasicBlock::iterator();
- return CurInst;
- }
- if (TRI->regsOverlap(DestReg, Opnd.getReg()))
- return MachineBasicBlock::iterator();
- }
- }
- InstrDistance++;
- ++CurInst;
- }
- return MachineBasicBlock::iterator();
- }
- void FixupLEAPass::checkRegUsage(MachineBasicBlock::iterator &LeaI,
- MachineBasicBlock::iterator &AluI,
- bool &BaseIndexDef, bool &AluDestRef,
- MachineOperand **KilledBase,
- MachineOperand **KilledIndex) const {
- BaseIndexDef = AluDestRef = false;
- *KilledBase = *KilledIndex = nullptr;
- Register BaseReg = LeaI->getOperand(1 + X86::AddrBaseReg).getReg();
- Register IndexReg = LeaI->getOperand(1 + X86::AddrIndexReg).getReg();
- Register AluDestReg = AluI->getOperand(0).getReg();
- MachineBasicBlock::iterator CurInst = std::next(LeaI);
- while (CurInst != AluI) {
- for (unsigned I = 0, E = CurInst->getNumOperands(); I != E; ++I) {
- MachineOperand &Opnd = CurInst->getOperand(I);
- if (!Opnd.isReg())
- continue;
- Register Reg = Opnd.getReg();
- if (TRI->regsOverlap(Reg, AluDestReg))
- AluDestRef = true;
- if (TRI->regsOverlap(Reg, BaseReg)) {
- if (Opnd.isDef())
- BaseIndexDef = true;
- else if (Opnd.isKill())
- *KilledBase = &Opnd;
- }
- if (TRI->regsOverlap(Reg, IndexReg)) {
- if (Opnd.isDef())
- BaseIndexDef = true;
- else if (Opnd.isKill())
- *KilledIndex = &Opnd;
- }
- }
- ++CurInst;
- }
- }
- bool FixupLEAPass::optLEAALU(MachineBasicBlock::iterator &I,
- MachineBasicBlock &MBB) const {
- // Look for an add/sub instruction which uses the result of lea.
- MachineBasicBlock::iterator AluI = searchALUInst(I, MBB);
- if (AluI == MachineBasicBlock::iterator())
- return false;
- // Check if there are any related register usage between lea and alu.
- bool BaseIndexDef, AluDestRef;
- MachineOperand *KilledBase, *KilledIndex;
- checkRegUsage(I, AluI, BaseIndexDef, AluDestRef, &KilledBase, &KilledIndex);
- MachineBasicBlock::iterator InsertPos = AluI;
- if (BaseIndexDef) {
- if (AluDestRef)
- return false;
- InsertPos = I;
- KilledBase = KilledIndex = nullptr;
- }
- // Check if there are same registers.
- Register AluDestReg = AluI->getOperand(0).getReg();
- Register BaseReg = I->getOperand(1 + X86::AddrBaseReg).getReg();
- Register IndexReg = I->getOperand(1 + X86::AddrIndexReg).getReg();
- if (I->getOpcode() == X86::LEA64_32r) {
- BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit);
- IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit);
- }
- if (AluDestReg == IndexReg) {
- if (BaseReg == IndexReg)
- return false;
- std::swap(BaseReg, IndexReg);
- std::swap(KilledBase, KilledIndex);
- }
- if (BaseReg == IndexReg)
- KilledBase = nullptr;
- // Now it's safe to change instructions.
- MachineInstr *NewMI1, *NewMI2;
- unsigned NewOpcode = AluI->getOpcode();
- NewMI1 = BuildMI(MBB, InsertPos, AluI->getDebugLoc(), TII->get(NewOpcode),
- AluDestReg)
- .addReg(AluDestReg, RegState::Kill)
- .addReg(BaseReg, KilledBase ? RegState::Kill : 0);
- NewMI1->addRegisterDead(X86::EFLAGS, TRI);
- NewMI2 = BuildMI(MBB, InsertPos, AluI->getDebugLoc(), TII->get(NewOpcode),
- AluDestReg)
- .addReg(AluDestReg, RegState::Kill)
- .addReg(IndexReg, KilledIndex ? RegState::Kill : 0);
- NewMI2->addRegisterDead(X86::EFLAGS, TRI);
- // Clear the old Kill flags.
- if (KilledBase)
- KilledBase->setIsKill(false);
- if (KilledIndex)
- KilledIndex->setIsKill(false);
- MBB.getParent()->substituteDebugValuesForInst(*AluI, *NewMI1, 1);
- MBB.getParent()->substituteDebugValuesForInst(*AluI, *NewMI2, 1);
- MBB.erase(I);
- MBB.erase(AluI);
- I = NewMI1;
- return true;
- }
- bool FixupLEAPass::optTwoAddrLEA(MachineBasicBlock::iterator &I,
- MachineBasicBlock &MBB, bool OptIncDec,
- bool UseLEAForSP) const {
- MachineInstr &MI = *I;
- const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg);
- const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt);
- const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg);
- const MachineOperand &Disp = MI.getOperand(1 + X86::AddrDisp);
- const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
- if (Segment.getReg() != 0 || !Disp.isImm() || Scale.getImm() > 1 ||
- MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I) !=
- MachineBasicBlock::LQR_Dead)
- return false;
- Register DestReg = MI.getOperand(0).getReg();
- Register BaseReg = Base.getReg();
- Register IndexReg = Index.getReg();
- // Don't change stack adjustment LEAs.
- if (UseLEAForSP && (DestReg == X86::ESP || DestReg == X86::RSP))
- return false;
- // LEA64_32 has 64-bit operands but 32-bit result.
- if (MI.getOpcode() == X86::LEA64_32r) {
- if (BaseReg != 0)
- BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit);
- if (IndexReg != 0)
- IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit);
- }
- MachineInstr *NewMI = nullptr;
- // Case 1.
- // Look for lea(%reg1, %reg2), %reg1 or lea(%reg2, %reg1), %reg1
- // which can be turned into add %reg2, %reg1
- if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0 &&
- (DestReg == BaseReg || DestReg == IndexReg)) {
- unsigned NewOpcode = getADDrrFromLEA(MI.getOpcode());
- if (DestReg != BaseReg)
- std::swap(BaseReg, IndexReg);
- if (MI.getOpcode() == X86::LEA64_32r) {
- // TODO: Do we need the super register implicit use?
- NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
- .addReg(BaseReg).addReg(IndexReg)
- .addReg(Base.getReg(), RegState::Implicit)
- .addReg(Index.getReg(), RegState::Implicit);
- } else {
- NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
- .addReg(BaseReg).addReg(IndexReg);
- }
- } else if (DestReg == BaseReg && IndexReg == 0) {
- // Case 2.
- // This is an LEA with only a base register and a displacement,
- // We can use ADDri or INC/DEC.
- // Does this LEA have one these forms:
- // lea %reg, 1(%reg)
- // lea %reg, -1(%reg)
- if (OptIncDec && (Disp.getImm() == 1 || Disp.getImm() == -1)) {
- bool IsINC = Disp.getImm() == 1;
- unsigned NewOpcode = getINCDECFromLEA(MI.getOpcode(), IsINC);
- if (MI.getOpcode() == X86::LEA64_32r) {
- // TODO: Do we need the super register implicit use?
- NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
- .addReg(BaseReg).addReg(Base.getReg(), RegState::Implicit);
- } else {
- NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
- .addReg(BaseReg);
- }
- } else {
- unsigned NewOpcode = getADDriFromLEA(MI.getOpcode(), Disp);
- if (MI.getOpcode() == X86::LEA64_32r) {
- // TODO: Do we need the super register implicit use?
- NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
- .addReg(BaseReg).addImm(Disp.getImm())
- .addReg(Base.getReg(), RegState::Implicit);
- } else {
- NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
- .addReg(BaseReg).addImm(Disp.getImm());
- }
- }
- } else if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0) {
- // Case 3.
- // Look for and transform the sequence
- // lea (reg1, reg2), reg3
- // sub reg3, reg4
- return optLEAALU(I, MBB);
- } else
- return false;
- MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
- MBB.erase(I);
- I = NewMI;
- return true;
- }
- void FixupLEAPass::processInstruction(MachineBasicBlock::iterator &I,
- MachineBasicBlock &MBB) {
- // Process a load, store, or LEA instruction.
- MachineInstr &MI = *I;
- const MCInstrDesc &Desc = MI.getDesc();
- int AddrOffset = X86II::getMemoryOperandNo(Desc.TSFlags);
- if (AddrOffset >= 0) {
- AddrOffset += X86II::getOperandBias(Desc);
- MachineOperand &p = MI.getOperand(AddrOffset + X86::AddrBaseReg);
- if (p.isReg() && p.getReg() != X86::ESP) {
- seekLEAFixup(p, I, MBB);
- }
- MachineOperand &q = MI.getOperand(AddrOffset + X86::AddrIndexReg);
- if (q.isReg() && q.getReg() != X86::ESP) {
- seekLEAFixup(q, I, MBB);
- }
- }
- }
- void FixupLEAPass::seekLEAFixup(MachineOperand &p,
- MachineBasicBlock::iterator &I,
- MachineBasicBlock &MBB) {
- MachineBasicBlock::iterator MBI = searchBackwards(p, I, MBB);
- if (MBI != MachineBasicBlock::iterator()) {
- MachineInstr *NewMI = postRAConvertToLEA(MBB, MBI);
- if (NewMI) {
- ++NumLEAs;
- LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MBI->dump(););
- // now to replace with an equivalent LEA...
- LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: "; NewMI->dump(););
- MBB.getParent()->substituteDebugValuesForInst(*MBI, *NewMI, 1);
- MBB.erase(MBI);
- MachineBasicBlock::iterator J =
- static_cast<MachineBasicBlock::iterator>(NewMI);
- processInstruction(J, MBB);
- }
- }
- }
- void FixupLEAPass::processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
- MachineBasicBlock &MBB) {
- MachineInstr &MI = *I;
- const unsigned Opcode = MI.getOpcode();
- const MachineOperand &Dst = MI.getOperand(0);
- const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg);
- const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt);
- const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg);
- const MachineOperand &Offset = MI.getOperand(1 + X86::AddrDisp);
- const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
- if (Segment.getReg() != 0 || !Offset.isImm() ||
- MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I, 4) !=
- MachineBasicBlock::LQR_Dead)
- return;
- const Register DstR = Dst.getReg();
- const Register SrcR1 = Base.getReg();
- const Register SrcR2 = Index.getReg();
- if ((SrcR1 == 0 || SrcR1 != DstR) && (SrcR2 == 0 || SrcR2 != DstR))
- return;
- if (Scale.getImm() > 1)
- return;
- LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; I->dump(););
- LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";);
- MachineInstr *NewMI = nullptr;
- // Make ADD instruction for two registers writing to LEA's destination
- if (SrcR1 != 0 && SrcR2 != 0) {
- const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(Opcode));
- const MachineOperand &Src = SrcR1 == DstR ? Index : Base;
- NewMI =
- BuildMI(MBB, I, MI.getDebugLoc(), ADDrr, DstR).addReg(DstR).add(Src);
- LLVM_DEBUG(NewMI->dump(););
- }
- // Make ADD instruction for immediate
- if (Offset.getImm() != 0) {
- const MCInstrDesc &ADDri =
- TII->get(getADDriFromLEA(Opcode, Offset));
- const MachineOperand &SrcR = SrcR1 == DstR ? Base : Index;
- NewMI = BuildMI(MBB, I, MI.getDebugLoc(), ADDri, DstR)
- .add(SrcR)
- .addImm(Offset.getImm());
- LLVM_DEBUG(NewMI->dump(););
- }
- if (NewMI) {
- MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
- MBB.erase(I);
- I = NewMI;
- }
- }
- void FixupLEAPass::processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I,
- MachineBasicBlock &MBB,
- bool OptIncDec) {
- MachineInstr &MI = *I;
- const unsigned LEAOpcode = MI.getOpcode();
- const MachineOperand &Dest = MI.getOperand(0);
- const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg);
- const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt);
- const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg);
- const MachineOperand &Offset = MI.getOperand(1 + X86::AddrDisp);
- const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
- if (!(TII->isThreeOperandsLEA(MI) || hasInefficientLEABaseReg(Base, Index)) ||
- MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I, 4) !=
- MachineBasicBlock::LQR_Dead ||
- Segment.getReg() != X86::NoRegister)
- return;
- Register DestReg = Dest.getReg();
- Register BaseReg = Base.getReg();
- Register IndexReg = Index.getReg();
- if (MI.getOpcode() == X86::LEA64_32r) {
- if (BaseReg != 0)
- BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit);
- if (IndexReg != 0)
- IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit);
- }
- bool IsScale1 = Scale.getImm() == 1;
- bool IsInefficientBase = isInefficientLEAReg(BaseReg);
- bool IsInefficientIndex = isInefficientLEAReg(IndexReg);
- // Skip these cases since it takes more than 2 instructions
- // to replace the LEA instruction.
- if (IsInefficientBase && DestReg == BaseReg && !IsScale1)
- return;
- LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MI.dump(););
- LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";);
- MachineInstr *NewMI = nullptr;
- // First try to replace LEA with one or two (for the 3-op LEA case)
- // add instructions:
- // 1.lea (%base,%index,1), %base => add %index,%base
- // 2.lea (%base,%index,1), %index => add %base,%index
- if (IsScale1 && (DestReg == BaseReg || DestReg == IndexReg)) {
- unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());
- if (DestReg != BaseReg)
- std::swap(BaseReg, IndexReg);
- if (MI.getOpcode() == X86::LEA64_32r) {
- // TODO: Do we need the super register implicit use?
- NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
- .addReg(BaseReg)
- .addReg(IndexReg)
- .addReg(Base.getReg(), RegState::Implicit)
- .addReg(Index.getReg(), RegState::Implicit);
- } else {
- NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
- .addReg(BaseReg)
- .addReg(IndexReg);
- }
- } else if (!IsInefficientBase || (!IsInefficientIndex && IsScale1)) {
- // If the base is inefficient try switching the index and base operands,
- // otherwise just break the 3-Ops LEA inst into 2-Ops LEA + ADD instruction:
- // lea offset(%base,%index,scale),%dst =>
- // lea (%base,%index,scale); add offset,%dst
- NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode))
- .add(Dest)
- .add(IsInefficientBase ? Index : Base)
- .add(Scale)
- .add(IsInefficientBase ? Base : Index)
- .addImm(0)
- .add(Segment);
- LLVM_DEBUG(NewMI->dump(););
- }
- // If either replacement succeeded above, add the offset if needed, then
- // replace the instruction.
- if (NewMI) {
- // Create ADD instruction for the Offset in case of 3-Ops LEA.
- if (hasLEAOffset(Offset)) {
- if (OptIncDec && Offset.isImm() &&
- (Offset.getImm() == 1 || Offset.getImm() == -1)) {
- unsigned NewOpc =
- getINCDECFromLEA(MI.getOpcode(), Offset.getImm() == 1);
- NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
- .addReg(DestReg);
- LLVM_DEBUG(NewMI->dump(););
- } else {
- unsigned NewOpc = getADDriFromLEA(MI.getOpcode(), Offset);
- NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
- .addReg(DestReg)
- .add(Offset);
- LLVM_DEBUG(NewMI->dump(););
- }
- }
- MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
- MBB.erase(I);
- I = NewMI;
- return;
- }
- // Handle the rest of the cases with inefficient base register:
- assert(DestReg != BaseReg && "DestReg == BaseReg should be handled already!");
- assert(IsInefficientBase && "efficient base should be handled already!");
- // FIXME: Handle LEA64_32r.
- if (LEAOpcode == X86::LEA64_32r)
- return;
- // lea (%base,%index,1), %dst => mov %base,%dst; add %index,%dst
- if (IsScale1 && !hasLEAOffset(Offset)) {
- bool BIK = Base.isKill() && BaseReg != IndexReg;
- TII->copyPhysReg(MBB, MI, MI.getDebugLoc(), DestReg, BaseReg, BIK);
- LLVM_DEBUG(MI.getPrevNode()->dump(););
- unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());
- NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
- .addReg(DestReg)
- .add(Index);
- LLVM_DEBUG(NewMI->dump(););
- MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
- MBB.erase(I);
- I = NewMI;
- return;
- }
- // lea offset(%base,%index,scale), %dst =>
- // lea offset( ,%index,scale), %dst; add %base,%dst
- NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode))
- .add(Dest)
- .addReg(0)
- .add(Scale)
- .add(Index)
- .add(Offset)
- .add(Segment);
- LLVM_DEBUG(NewMI->dump(););
- unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());
- NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
- .addReg(DestReg)
- .add(Base);
- LLVM_DEBUG(NewMI->dump(););
- MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
- MBB.erase(I);
- I = NewMI;
- }
|