X86FixupLEAs.cpp 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900
  1. //===-- X86FixupLEAs.cpp - use or replace LEA instructions -----------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines the pass that finds instructions that can be
  10. // re-written as LEA instructions in order to reduce pipeline delays.
  11. // It replaces LEAs with ADD/INC/DEC when that is better for size/speed.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #include "X86.h"
  15. #include "X86InstrInfo.h"
  16. #include "X86Subtarget.h"
  17. #include "llvm/ADT/Statistic.h"
  18. #include "llvm/Analysis/ProfileSummaryInfo.h"
  19. #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
  20. #include "llvm/CodeGen/MachineFunctionPass.h"
  21. #include "llvm/CodeGen/MachineInstrBuilder.h"
  22. #include "llvm/CodeGen/MachineSizeOpts.h"
  23. #include "llvm/CodeGen/Passes.h"
  24. #include "llvm/CodeGen/TargetSchedule.h"
  25. #include "llvm/Support/Debug.h"
  26. #include "llvm/Support/raw_ostream.h"
  27. using namespace llvm;
  28. #define FIXUPLEA_DESC "X86 LEA Fixup"
  29. #define FIXUPLEA_NAME "x86-fixup-LEAs"
  30. #define DEBUG_TYPE FIXUPLEA_NAME
  31. STATISTIC(NumLEAs, "Number of LEA instructions created");
  32. namespace {
  33. class FixupLEAPass : public MachineFunctionPass {
  34. enum RegUsageState { RU_NotUsed, RU_Write, RU_Read };
  35. /// Given a machine register, look for the instruction
  36. /// which writes it in the current basic block. If found,
  37. /// try to replace it with an equivalent LEA instruction.
  38. /// If replacement succeeds, then also process the newly created
  39. /// instruction.
  40. void seekLEAFixup(MachineOperand &p, MachineBasicBlock::iterator &I,
  41. MachineBasicBlock &MBB);
  42. /// Given a memory access or LEA instruction
  43. /// whose address mode uses a base and/or index register, look for
  44. /// an opportunity to replace the instruction which sets the base or index
  45. /// register with an equivalent LEA instruction.
  46. void processInstruction(MachineBasicBlock::iterator &I,
  47. MachineBasicBlock &MBB);
  48. /// Given a LEA instruction which is unprofitable
  49. /// on SlowLEA targets try to replace it with an equivalent ADD instruction.
  50. void processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
  51. MachineBasicBlock &MBB);
  52. /// Given a LEA instruction which is unprofitable
  53. /// on SNB+ try to replace it with other instructions.
  54. /// According to Intel's Optimization Reference Manual:
  55. /// " For LEA instructions with three source operands and some specific
  56. /// situations, instruction latency has increased to 3 cycles, and must
  57. /// dispatch via port 1:
  58. /// - LEA that has all three source operands: base, index, and offset
  59. /// - LEA that uses base and index registers where the base is EBP, RBP,
  60. /// or R13
  61. /// - LEA that uses RIP relative addressing mode
  62. /// - LEA that uses 16-bit addressing mode "
  63. /// This function currently handles the first 2 cases only.
  64. void processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I,
  65. MachineBasicBlock &MBB, bool OptIncDec);
  66. /// Look for LEAs that are really two address LEAs that we might be able to
  67. /// turn into regular ADD instructions.
  68. bool optTwoAddrLEA(MachineBasicBlock::iterator &I,
  69. MachineBasicBlock &MBB, bool OptIncDec,
  70. bool UseLEAForSP) const;
  71. /// Look for and transform the sequence
  72. /// lea (reg1, reg2), reg3
  73. /// sub reg3, reg4
  74. /// to
  75. /// sub reg1, reg4
  76. /// sub reg2, reg4
  77. /// It can also optimize the sequence lea/add similarly.
  78. bool optLEAALU(MachineBasicBlock::iterator &I, MachineBasicBlock &MBB) const;
  79. /// Step forwards in MBB, looking for an ADD/SUB instruction which uses
  80. /// the dest register of LEA instruction I.
  81. MachineBasicBlock::iterator searchALUInst(MachineBasicBlock::iterator &I,
  82. MachineBasicBlock &MBB) const;
  83. /// Check instructions between LeaI and AluI (exclusively).
  84. /// Set BaseIndexDef to true if base or index register from LeaI is defined.
  85. /// Set AluDestRef to true if the dest register of AluI is used or defined.
  86. /// *KilledBase is set to the killed base register usage.
  87. /// *KilledIndex is set to the killed index register usage.
  88. void checkRegUsage(MachineBasicBlock::iterator &LeaI,
  89. MachineBasicBlock::iterator &AluI, bool &BaseIndexDef,
  90. bool &AluDestRef, MachineOperand **KilledBase,
  91. MachineOperand **KilledIndex) const;
  92. /// Determine if an instruction references a machine register
  93. /// and, if so, whether it reads or writes the register.
  94. RegUsageState usesRegister(MachineOperand &p, MachineBasicBlock::iterator I);
  95. /// Step backwards through a basic block, looking
  96. /// for an instruction which writes a register within
  97. /// a maximum of INSTR_DISTANCE_THRESHOLD instruction latency cycles.
  98. MachineBasicBlock::iterator searchBackwards(MachineOperand &p,
  99. MachineBasicBlock::iterator &I,
  100. MachineBasicBlock &MBB);
  101. /// if an instruction can be converted to an
  102. /// equivalent LEA, insert the new instruction into the basic block
  103. /// and return a pointer to it. Otherwise, return zero.
  104. MachineInstr *postRAConvertToLEA(MachineBasicBlock &MBB,
  105. MachineBasicBlock::iterator &MBBI) const;
  106. public:
  107. static char ID;
  108. StringRef getPassName() const override { return FIXUPLEA_DESC; }
  109. FixupLEAPass() : MachineFunctionPass(ID) { }
  110. /// Loop over all of the basic blocks,
  111. /// replacing instructions by equivalent LEA instructions
  112. /// if needed and when possible.
  113. bool runOnMachineFunction(MachineFunction &MF) override;
  114. // This pass runs after regalloc and doesn't support VReg operands.
  115. MachineFunctionProperties getRequiredProperties() const override {
  116. return MachineFunctionProperties().set(
  117. MachineFunctionProperties::Property::NoVRegs);
  118. }
  119. void getAnalysisUsage(AnalysisUsage &AU) const override {
  120. AU.addRequired<ProfileSummaryInfoWrapperPass>();
  121. AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
  122. MachineFunctionPass::getAnalysisUsage(AU);
  123. }
  124. private:
  125. TargetSchedModel TSM;
  126. const X86InstrInfo *TII = nullptr;
  127. const X86RegisterInfo *TRI = nullptr;
  128. };
  129. }
  130. char FixupLEAPass::ID = 0;
  131. INITIALIZE_PASS(FixupLEAPass, FIXUPLEA_NAME, FIXUPLEA_DESC, false, false)
  132. MachineInstr *
  133. FixupLEAPass::postRAConvertToLEA(MachineBasicBlock &MBB,
  134. MachineBasicBlock::iterator &MBBI) const {
  135. MachineInstr &MI = *MBBI;
  136. switch (MI.getOpcode()) {
  137. case X86::MOV32rr:
  138. case X86::MOV64rr: {
  139. const MachineOperand &Src = MI.getOperand(1);
  140. const MachineOperand &Dest = MI.getOperand(0);
  141. MachineInstr *NewMI =
  142. BuildMI(MBB, MBBI, MI.getDebugLoc(),
  143. TII->get(MI.getOpcode() == X86::MOV32rr ? X86::LEA32r
  144. : X86::LEA64r))
  145. .add(Dest)
  146. .add(Src)
  147. .addImm(1)
  148. .addReg(0)
  149. .addImm(0)
  150. .addReg(0);
  151. return NewMI;
  152. }
  153. }
  154. if (!MI.isConvertibleTo3Addr())
  155. return nullptr;
  156. switch (MI.getOpcode()) {
  157. default:
  158. // Only convert instructions that we've verified are safe.
  159. return nullptr;
  160. case X86::ADD64ri32:
  161. case X86::ADD64ri8:
  162. case X86::ADD64ri32_DB:
  163. case X86::ADD64ri8_DB:
  164. case X86::ADD32ri:
  165. case X86::ADD32ri8:
  166. case X86::ADD32ri_DB:
  167. case X86::ADD32ri8_DB:
  168. if (!MI.getOperand(2).isImm()) {
  169. // convertToThreeAddress will call getImm()
  170. // which requires isImm() to be true
  171. return nullptr;
  172. }
  173. break;
  174. case X86::SHL64ri:
  175. case X86::SHL32ri:
  176. case X86::INC64r:
  177. case X86::INC32r:
  178. case X86::DEC64r:
  179. case X86::DEC32r:
  180. case X86::ADD64rr:
  181. case X86::ADD64rr_DB:
  182. case X86::ADD32rr:
  183. case X86::ADD32rr_DB:
  184. // These instructions are all fine to convert.
  185. break;
  186. }
  187. return TII->convertToThreeAddress(MI, nullptr, nullptr);
  188. }
  189. FunctionPass *llvm::createX86FixupLEAs() { return new FixupLEAPass(); }
  190. static bool isLEA(unsigned Opcode) {
  191. return Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
  192. Opcode == X86::LEA64_32r;
  193. }
  194. bool FixupLEAPass::runOnMachineFunction(MachineFunction &MF) {
  195. if (skipFunction(MF.getFunction()))
  196. return false;
  197. const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
  198. bool IsSlowLEA = ST.slowLEA();
  199. bool IsSlow3OpsLEA = ST.slow3OpsLEA();
  200. bool LEAUsesAG = ST.LEAusesAG();
  201. bool OptIncDec = !ST.slowIncDec() || MF.getFunction().hasOptSize();
  202. bool UseLEAForSP = ST.useLeaForSP();
  203. TSM.init(&ST);
  204. TII = ST.getInstrInfo();
  205. TRI = ST.getRegisterInfo();
  206. auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
  207. auto *MBFI = (PSI && PSI->hasProfileSummary())
  208. ? &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI()
  209. : nullptr;
  210. LLVM_DEBUG(dbgs() << "Start X86FixupLEAs\n";);
  211. for (MachineBasicBlock &MBB : MF) {
  212. // First pass. Try to remove or optimize existing LEAs.
  213. bool OptIncDecPerBB =
  214. OptIncDec || llvm::shouldOptimizeForSize(&MBB, PSI, MBFI);
  215. for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
  216. if (!isLEA(I->getOpcode()))
  217. continue;
  218. if (optTwoAddrLEA(I, MBB, OptIncDecPerBB, UseLEAForSP))
  219. continue;
  220. if (IsSlowLEA)
  221. processInstructionForSlowLEA(I, MBB);
  222. else if (IsSlow3OpsLEA)
  223. processInstrForSlow3OpLEA(I, MBB, OptIncDecPerBB);
  224. }
  225. // Second pass for creating LEAs. This may reverse some of the
  226. // transformations above.
  227. if (LEAUsesAG) {
  228. for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
  229. processInstruction(I, MBB);
  230. }
  231. }
  232. LLVM_DEBUG(dbgs() << "End X86FixupLEAs\n";);
  233. return true;
  234. }
  235. FixupLEAPass::RegUsageState
  236. FixupLEAPass::usesRegister(MachineOperand &p, MachineBasicBlock::iterator I) {
  237. RegUsageState RegUsage = RU_NotUsed;
  238. MachineInstr &MI = *I;
  239. for (const MachineOperand &MO : MI.operands()) {
  240. if (MO.isReg() && MO.getReg() == p.getReg()) {
  241. if (MO.isDef())
  242. return RU_Write;
  243. RegUsage = RU_Read;
  244. }
  245. }
  246. return RegUsage;
  247. }
  248. /// getPreviousInstr - Given a reference to an instruction in a basic
  249. /// block, return a reference to the previous instruction in the block,
  250. /// wrapping around to the last instruction of the block if the block
  251. /// branches to itself.
  252. static inline bool getPreviousInstr(MachineBasicBlock::iterator &I,
  253. MachineBasicBlock &MBB) {
  254. if (I == MBB.begin()) {
  255. if (MBB.isPredecessor(&MBB)) {
  256. I = --MBB.end();
  257. return true;
  258. } else
  259. return false;
  260. }
  261. --I;
  262. return true;
  263. }
  264. MachineBasicBlock::iterator
  265. FixupLEAPass::searchBackwards(MachineOperand &p, MachineBasicBlock::iterator &I,
  266. MachineBasicBlock &MBB) {
  267. int InstrDistance = 1;
  268. MachineBasicBlock::iterator CurInst;
  269. static const int INSTR_DISTANCE_THRESHOLD = 5;
  270. CurInst = I;
  271. bool Found;
  272. Found = getPreviousInstr(CurInst, MBB);
  273. while (Found && I != CurInst) {
  274. if (CurInst->isCall() || CurInst->isInlineAsm())
  275. break;
  276. if (InstrDistance > INSTR_DISTANCE_THRESHOLD)
  277. break; // too far back to make a difference
  278. if (usesRegister(p, CurInst) == RU_Write) {
  279. return CurInst;
  280. }
  281. InstrDistance += TSM.computeInstrLatency(&*CurInst);
  282. Found = getPreviousInstr(CurInst, MBB);
  283. }
  284. return MachineBasicBlock::iterator();
  285. }
  286. static inline bool isInefficientLEAReg(unsigned Reg) {
  287. return Reg == X86::EBP || Reg == X86::RBP ||
  288. Reg == X86::R13D || Reg == X86::R13;
  289. }
  290. /// Returns true if this LEA uses base an index registers, and the base register
  291. /// is known to be inefficient for the subtarget.
  292. // TODO: use a variant scheduling class to model the latency profile
  293. // of LEA instructions, and implement this logic as a scheduling predicate.
  294. static inline bool hasInefficientLEABaseReg(const MachineOperand &Base,
  295. const MachineOperand &Index) {
  296. return Base.isReg() && isInefficientLEAReg(Base.getReg()) && Index.isReg() &&
  297. Index.getReg() != X86::NoRegister;
  298. }
  299. static inline bool hasLEAOffset(const MachineOperand &Offset) {
  300. return (Offset.isImm() && Offset.getImm() != 0) || Offset.isGlobal();
  301. }
  302. static inline unsigned getADDrrFromLEA(unsigned LEAOpcode) {
  303. switch (LEAOpcode) {
  304. default:
  305. llvm_unreachable("Unexpected LEA instruction");
  306. case X86::LEA32r:
  307. case X86::LEA64_32r:
  308. return X86::ADD32rr;
  309. case X86::LEA64r:
  310. return X86::ADD64rr;
  311. }
  312. }
  313. static inline unsigned getSUBrrFromLEA(unsigned LEAOpcode) {
  314. switch (LEAOpcode) {
  315. default:
  316. llvm_unreachable("Unexpected LEA instruction");
  317. case X86::LEA32r:
  318. case X86::LEA64_32r:
  319. return X86::SUB32rr;
  320. case X86::LEA64r:
  321. return X86::SUB64rr;
  322. }
  323. }
  324. static inline unsigned getADDriFromLEA(unsigned LEAOpcode,
  325. const MachineOperand &Offset) {
  326. bool IsInt8 = Offset.isImm() && isInt<8>(Offset.getImm());
  327. switch (LEAOpcode) {
  328. default:
  329. llvm_unreachable("Unexpected LEA instruction");
  330. case X86::LEA32r:
  331. case X86::LEA64_32r:
  332. return IsInt8 ? X86::ADD32ri8 : X86::ADD32ri;
  333. case X86::LEA64r:
  334. return IsInt8 ? X86::ADD64ri8 : X86::ADD64ri32;
  335. }
  336. }
  337. static inline unsigned getINCDECFromLEA(unsigned LEAOpcode, bool IsINC) {
  338. switch (LEAOpcode) {
  339. default:
  340. llvm_unreachable("Unexpected LEA instruction");
  341. case X86::LEA32r:
  342. case X86::LEA64_32r:
  343. return IsINC ? X86::INC32r : X86::DEC32r;
  344. case X86::LEA64r:
  345. return IsINC ? X86::INC64r : X86::DEC64r;
  346. }
  347. }
  348. MachineBasicBlock::iterator
  349. FixupLEAPass::searchALUInst(MachineBasicBlock::iterator &I,
  350. MachineBasicBlock &MBB) const {
  351. const int InstrDistanceThreshold = 5;
  352. int InstrDistance = 1;
  353. MachineBasicBlock::iterator CurInst = std::next(I);
  354. unsigned LEAOpcode = I->getOpcode();
  355. unsigned AddOpcode = getADDrrFromLEA(LEAOpcode);
  356. unsigned SubOpcode = getSUBrrFromLEA(LEAOpcode);
  357. Register DestReg = I->getOperand(0).getReg();
  358. while (CurInst != MBB.end()) {
  359. if (CurInst->isCall() || CurInst->isInlineAsm())
  360. break;
  361. if (InstrDistance > InstrDistanceThreshold)
  362. break;
  363. // Check if the lea dest register is used in an add/sub instruction only.
  364. for (unsigned I = 0, E = CurInst->getNumOperands(); I != E; ++I) {
  365. MachineOperand &Opnd = CurInst->getOperand(I);
  366. if (Opnd.isReg()) {
  367. if (Opnd.getReg() == DestReg) {
  368. if (Opnd.isDef() || !Opnd.isKill())
  369. return MachineBasicBlock::iterator();
  370. unsigned AluOpcode = CurInst->getOpcode();
  371. if (AluOpcode != AddOpcode && AluOpcode != SubOpcode)
  372. return MachineBasicBlock::iterator();
  373. MachineOperand &Opnd2 = CurInst->getOperand(3 - I);
  374. MachineOperand AluDest = CurInst->getOperand(0);
  375. if (Opnd2.getReg() != AluDest.getReg())
  376. return MachineBasicBlock::iterator();
  377. // X - (Y + Z) may generate different flags than (X - Y) - Z when
  378. // there is overflow. So we can't change the alu instruction if the
  379. // flags register is live.
  380. if (!CurInst->registerDefIsDead(X86::EFLAGS, TRI))
  381. return MachineBasicBlock::iterator();
  382. return CurInst;
  383. }
  384. if (TRI->regsOverlap(DestReg, Opnd.getReg()))
  385. return MachineBasicBlock::iterator();
  386. }
  387. }
  388. InstrDistance++;
  389. ++CurInst;
  390. }
  391. return MachineBasicBlock::iterator();
  392. }
  393. void FixupLEAPass::checkRegUsage(MachineBasicBlock::iterator &LeaI,
  394. MachineBasicBlock::iterator &AluI,
  395. bool &BaseIndexDef, bool &AluDestRef,
  396. MachineOperand **KilledBase,
  397. MachineOperand **KilledIndex) const {
  398. BaseIndexDef = AluDestRef = false;
  399. *KilledBase = *KilledIndex = nullptr;
  400. Register BaseReg = LeaI->getOperand(1 + X86::AddrBaseReg).getReg();
  401. Register IndexReg = LeaI->getOperand(1 + X86::AddrIndexReg).getReg();
  402. Register AluDestReg = AluI->getOperand(0).getReg();
  403. MachineBasicBlock::iterator CurInst = std::next(LeaI);
  404. while (CurInst != AluI) {
  405. for (unsigned I = 0, E = CurInst->getNumOperands(); I != E; ++I) {
  406. MachineOperand &Opnd = CurInst->getOperand(I);
  407. if (!Opnd.isReg())
  408. continue;
  409. Register Reg = Opnd.getReg();
  410. if (TRI->regsOverlap(Reg, AluDestReg))
  411. AluDestRef = true;
  412. if (TRI->regsOverlap(Reg, BaseReg)) {
  413. if (Opnd.isDef())
  414. BaseIndexDef = true;
  415. else if (Opnd.isKill())
  416. *KilledBase = &Opnd;
  417. }
  418. if (TRI->regsOverlap(Reg, IndexReg)) {
  419. if (Opnd.isDef())
  420. BaseIndexDef = true;
  421. else if (Opnd.isKill())
  422. *KilledIndex = &Opnd;
  423. }
  424. }
  425. ++CurInst;
  426. }
  427. }
  428. bool FixupLEAPass::optLEAALU(MachineBasicBlock::iterator &I,
  429. MachineBasicBlock &MBB) const {
  430. // Look for an add/sub instruction which uses the result of lea.
  431. MachineBasicBlock::iterator AluI = searchALUInst(I, MBB);
  432. if (AluI == MachineBasicBlock::iterator())
  433. return false;
  434. // Check if there are any related register usage between lea and alu.
  435. bool BaseIndexDef, AluDestRef;
  436. MachineOperand *KilledBase, *KilledIndex;
  437. checkRegUsage(I, AluI, BaseIndexDef, AluDestRef, &KilledBase, &KilledIndex);
  438. MachineBasicBlock::iterator InsertPos = AluI;
  439. if (BaseIndexDef) {
  440. if (AluDestRef)
  441. return false;
  442. InsertPos = I;
  443. KilledBase = KilledIndex = nullptr;
  444. }
  445. // Check if there are same registers.
  446. Register AluDestReg = AluI->getOperand(0).getReg();
  447. Register BaseReg = I->getOperand(1 + X86::AddrBaseReg).getReg();
  448. Register IndexReg = I->getOperand(1 + X86::AddrIndexReg).getReg();
  449. if (I->getOpcode() == X86::LEA64_32r) {
  450. BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit);
  451. IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit);
  452. }
  453. if (AluDestReg == IndexReg) {
  454. if (BaseReg == IndexReg)
  455. return false;
  456. std::swap(BaseReg, IndexReg);
  457. std::swap(KilledBase, KilledIndex);
  458. }
  459. if (BaseReg == IndexReg)
  460. KilledBase = nullptr;
  461. // Now it's safe to change instructions.
  462. MachineInstr *NewMI1, *NewMI2;
  463. unsigned NewOpcode = AluI->getOpcode();
  464. NewMI1 = BuildMI(MBB, InsertPos, AluI->getDebugLoc(), TII->get(NewOpcode),
  465. AluDestReg)
  466. .addReg(AluDestReg, RegState::Kill)
  467. .addReg(BaseReg, KilledBase ? RegState::Kill : 0);
  468. NewMI1->addRegisterDead(X86::EFLAGS, TRI);
  469. NewMI2 = BuildMI(MBB, InsertPos, AluI->getDebugLoc(), TII->get(NewOpcode),
  470. AluDestReg)
  471. .addReg(AluDestReg, RegState::Kill)
  472. .addReg(IndexReg, KilledIndex ? RegState::Kill : 0);
  473. NewMI2->addRegisterDead(X86::EFLAGS, TRI);
  474. // Clear the old Kill flags.
  475. if (KilledBase)
  476. KilledBase->setIsKill(false);
  477. if (KilledIndex)
  478. KilledIndex->setIsKill(false);
  479. MBB.getParent()->substituteDebugValuesForInst(*AluI, *NewMI1, 1);
  480. MBB.getParent()->substituteDebugValuesForInst(*AluI, *NewMI2, 1);
  481. MBB.erase(I);
  482. MBB.erase(AluI);
  483. I = NewMI1;
  484. return true;
  485. }
  486. bool FixupLEAPass::optTwoAddrLEA(MachineBasicBlock::iterator &I,
  487. MachineBasicBlock &MBB, bool OptIncDec,
  488. bool UseLEAForSP) const {
  489. MachineInstr &MI = *I;
  490. const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg);
  491. const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt);
  492. const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg);
  493. const MachineOperand &Disp = MI.getOperand(1 + X86::AddrDisp);
  494. const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
  495. if (Segment.getReg() != 0 || !Disp.isImm() || Scale.getImm() > 1 ||
  496. MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I) !=
  497. MachineBasicBlock::LQR_Dead)
  498. return false;
  499. Register DestReg = MI.getOperand(0).getReg();
  500. Register BaseReg = Base.getReg();
  501. Register IndexReg = Index.getReg();
  502. // Don't change stack adjustment LEAs.
  503. if (UseLEAForSP && (DestReg == X86::ESP || DestReg == X86::RSP))
  504. return false;
  505. // LEA64_32 has 64-bit operands but 32-bit result.
  506. if (MI.getOpcode() == X86::LEA64_32r) {
  507. if (BaseReg != 0)
  508. BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit);
  509. if (IndexReg != 0)
  510. IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit);
  511. }
  512. MachineInstr *NewMI = nullptr;
  513. // Case 1.
  514. // Look for lea(%reg1, %reg2), %reg1 or lea(%reg2, %reg1), %reg1
  515. // which can be turned into add %reg2, %reg1
  516. if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0 &&
  517. (DestReg == BaseReg || DestReg == IndexReg)) {
  518. unsigned NewOpcode = getADDrrFromLEA(MI.getOpcode());
  519. if (DestReg != BaseReg)
  520. std::swap(BaseReg, IndexReg);
  521. if (MI.getOpcode() == X86::LEA64_32r) {
  522. // TODO: Do we need the super register implicit use?
  523. NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
  524. .addReg(BaseReg).addReg(IndexReg)
  525. .addReg(Base.getReg(), RegState::Implicit)
  526. .addReg(Index.getReg(), RegState::Implicit);
  527. } else {
  528. NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
  529. .addReg(BaseReg).addReg(IndexReg);
  530. }
  531. } else if (DestReg == BaseReg && IndexReg == 0) {
  532. // Case 2.
  533. // This is an LEA with only a base register and a displacement,
  534. // We can use ADDri or INC/DEC.
  535. // Does this LEA have one these forms:
  536. // lea %reg, 1(%reg)
  537. // lea %reg, -1(%reg)
  538. if (OptIncDec && (Disp.getImm() == 1 || Disp.getImm() == -1)) {
  539. bool IsINC = Disp.getImm() == 1;
  540. unsigned NewOpcode = getINCDECFromLEA(MI.getOpcode(), IsINC);
  541. if (MI.getOpcode() == X86::LEA64_32r) {
  542. // TODO: Do we need the super register implicit use?
  543. NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
  544. .addReg(BaseReg).addReg(Base.getReg(), RegState::Implicit);
  545. } else {
  546. NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
  547. .addReg(BaseReg);
  548. }
  549. } else {
  550. unsigned NewOpcode = getADDriFromLEA(MI.getOpcode(), Disp);
  551. if (MI.getOpcode() == X86::LEA64_32r) {
  552. // TODO: Do we need the super register implicit use?
  553. NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
  554. .addReg(BaseReg).addImm(Disp.getImm())
  555. .addReg(Base.getReg(), RegState::Implicit);
  556. } else {
  557. NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
  558. .addReg(BaseReg).addImm(Disp.getImm());
  559. }
  560. }
  561. } else if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0) {
  562. // Case 3.
  563. // Look for and transform the sequence
  564. // lea (reg1, reg2), reg3
  565. // sub reg3, reg4
  566. return optLEAALU(I, MBB);
  567. } else
  568. return false;
  569. MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
  570. MBB.erase(I);
  571. I = NewMI;
  572. return true;
  573. }
  574. void FixupLEAPass::processInstruction(MachineBasicBlock::iterator &I,
  575. MachineBasicBlock &MBB) {
  576. // Process a load, store, or LEA instruction.
  577. MachineInstr &MI = *I;
  578. const MCInstrDesc &Desc = MI.getDesc();
  579. int AddrOffset = X86II::getMemoryOperandNo(Desc.TSFlags);
  580. if (AddrOffset >= 0) {
  581. AddrOffset += X86II::getOperandBias(Desc);
  582. MachineOperand &p = MI.getOperand(AddrOffset + X86::AddrBaseReg);
  583. if (p.isReg() && p.getReg() != X86::ESP) {
  584. seekLEAFixup(p, I, MBB);
  585. }
  586. MachineOperand &q = MI.getOperand(AddrOffset + X86::AddrIndexReg);
  587. if (q.isReg() && q.getReg() != X86::ESP) {
  588. seekLEAFixup(q, I, MBB);
  589. }
  590. }
  591. }
  592. void FixupLEAPass::seekLEAFixup(MachineOperand &p,
  593. MachineBasicBlock::iterator &I,
  594. MachineBasicBlock &MBB) {
  595. MachineBasicBlock::iterator MBI = searchBackwards(p, I, MBB);
  596. if (MBI != MachineBasicBlock::iterator()) {
  597. MachineInstr *NewMI = postRAConvertToLEA(MBB, MBI);
  598. if (NewMI) {
  599. ++NumLEAs;
  600. LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MBI->dump(););
  601. // now to replace with an equivalent LEA...
  602. LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: "; NewMI->dump(););
  603. MBB.getParent()->substituteDebugValuesForInst(*MBI, *NewMI, 1);
  604. MBB.erase(MBI);
  605. MachineBasicBlock::iterator J =
  606. static_cast<MachineBasicBlock::iterator>(NewMI);
  607. processInstruction(J, MBB);
  608. }
  609. }
  610. }
  611. void FixupLEAPass::processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
  612. MachineBasicBlock &MBB) {
  613. MachineInstr &MI = *I;
  614. const unsigned Opcode = MI.getOpcode();
  615. const MachineOperand &Dst = MI.getOperand(0);
  616. const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg);
  617. const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt);
  618. const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg);
  619. const MachineOperand &Offset = MI.getOperand(1 + X86::AddrDisp);
  620. const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
  621. if (Segment.getReg() != 0 || !Offset.isImm() ||
  622. MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I, 4) !=
  623. MachineBasicBlock::LQR_Dead)
  624. return;
  625. const Register DstR = Dst.getReg();
  626. const Register SrcR1 = Base.getReg();
  627. const Register SrcR2 = Index.getReg();
  628. if ((SrcR1 == 0 || SrcR1 != DstR) && (SrcR2 == 0 || SrcR2 != DstR))
  629. return;
  630. if (Scale.getImm() > 1)
  631. return;
  632. LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; I->dump(););
  633. LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";);
  634. MachineInstr *NewMI = nullptr;
  635. // Make ADD instruction for two registers writing to LEA's destination
  636. if (SrcR1 != 0 && SrcR2 != 0) {
  637. const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(Opcode));
  638. const MachineOperand &Src = SrcR1 == DstR ? Index : Base;
  639. NewMI =
  640. BuildMI(MBB, I, MI.getDebugLoc(), ADDrr, DstR).addReg(DstR).add(Src);
  641. LLVM_DEBUG(NewMI->dump(););
  642. }
  643. // Make ADD instruction for immediate
  644. if (Offset.getImm() != 0) {
  645. const MCInstrDesc &ADDri =
  646. TII->get(getADDriFromLEA(Opcode, Offset));
  647. const MachineOperand &SrcR = SrcR1 == DstR ? Base : Index;
  648. NewMI = BuildMI(MBB, I, MI.getDebugLoc(), ADDri, DstR)
  649. .add(SrcR)
  650. .addImm(Offset.getImm());
  651. LLVM_DEBUG(NewMI->dump(););
  652. }
  653. if (NewMI) {
  654. MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
  655. MBB.erase(I);
  656. I = NewMI;
  657. }
  658. }
  659. void FixupLEAPass::processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I,
  660. MachineBasicBlock &MBB,
  661. bool OptIncDec) {
  662. MachineInstr &MI = *I;
  663. const unsigned LEAOpcode = MI.getOpcode();
  664. const MachineOperand &Dest = MI.getOperand(0);
  665. const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg);
  666. const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt);
  667. const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg);
  668. const MachineOperand &Offset = MI.getOperand(1 + X86::AddrDisp);
  669. const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
  670. if (!(TII->isThreeOperandsLEA(MI) || hasInefficientLEABaseReg(Base, Index)) ||
  671. MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I, 4) !=
  672. MachineBasicBlock::LQR_Dead ||
  673. Segment.getReg() != X86::NoRegister)
  674. return;
  675. Register DestReg = Dest.getReg();
  676. Register BaseReg = Base.getReg();
  677. Register IndexReg = Index.getReg();
  678. if (MI.getOpcode() == X86::LEA64_32r) {
  679. if (BaseReg != 0)
  680. BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit);
  681. if (IndexReg != 0)
  682. IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit);
  683. }
  684. bool IsScale1 = Scale.getImm() == 1;
  685. bool IsInefficientBase = isInefficientLEAReg(BaseReg);
  686. bool IsInefficientIndex = isInefficientLEAReg(IndexReg);
  687. // Skip these cases since it takes more than 2 instructions
  688. // to replace the LEA instruction.
  689. if (IsInefficientBase && DestReg == BaseReg && !IsScale1)
  690. return;
  691. LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MI.dump(););
  692. LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";);
  693. MachineInstr *NewMI = nullptr;
  694. // First try to replace LEA with one or two (for the 3-op LEA case)
  695. // add instructions:
  696. // 1.lea (%base,%index,1), %base => add %index,%base
  697. // 2.lea (%base,%index,1), %index => add %base,%index
  698. if (IsScale1 && (DestReg == BaseReg || DestReg == IndexReg)) {
  699. unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());
  700. if (DestReg != BaseReg)
  701. std::swap(BaseReg, IndexReg);
  702. if (MI.getOpcode() == X86::LEA64_32r) {
  703. // TODO: Do we need the super register implicit use?
  704. NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
  705. .addReg(BaseReg)
  706. .addReg(IndexReg)
  707. .addReg(Base.getReg(), RegState::Implicit)
  708. .addReg(Index.getReg(), RegState::Implicit);
  709. } else {
  710. NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
  711. .addReg(BaseReg)
  712. .addReg(IndexReg);
  713. }
  714. } else if (!IsInefficientBase || (!IsInefficientIndex && IsScale1)) {
  715. // If the base is inefficient try switching the index and base operands,
  716. // otherwise just break the 3-Ops LEA inst into 2-Ops LEA + ADD instruction:
  717. // lea offset(%base,%index,scale),%dst =>
  718. // lea (%base,%index,scale); add offset,%dst
  719. NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode))
  720. .add(Dest)
  721. .add(IsInefficientBase ? Index : Base)
  722. .add(Scale)
  723. .add(IsInefficientBase ? Base : Index)
  724. .addImm(0)
  725. .add(Segment);
  726. LLVM_DEBUG(NewMI->dump(););
  727. }
  728. // If either replacement succeeded above, add the offset if needed, then
  729. // replace the instruction.
  730. if (NewMI) {
  731. // Create ADD instruction for the Offset in case of 3-Ops LEA.
  732. if (hasLEAOffset(Offset)) {
  733. if (OptIncDec && Offset.isImm() &&
  734. (Offset.getImm() == 1 || Offset.getImm() == -1)) {
  735. unsigned NewOpc =
  736. getINCDECFromLEA(MI.getOpcode(), Offset.getImm() == 1);
  737. NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
  738. .addReg(DestReg);
  739. LLVM_DEBUG(NewMI->dump(););
  740. } else {
  741. unsigned NewOpc = getADDriFromLEA(MI.getOpcode(), Offset);
  742. NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
  743. .addReg(DestReg)
  744. .add(Offset);
  745. LLVM_DEBUG(NewMI->dump(););
  746. }
  747. }
  748. MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
  749. MBB.erase(I);
  750. I = NewMI;
  751. return;
  752. }
  753. // Handle the rest of the cases with inefficient base register:
  754. assert(DestReg != BaseReg && "DestReg == BaseReg should be handled already!");
  755. assert(IsInefficientBase && "efficient base should be handled already!");
  756. // FIXME: Handle LEA64_32r.
  757. if (LEAOpcode == X86::LEA64_32r)
  758. return;
  759. // lea (%base,%index,1), %dst => mov %base,%dst; add %index,%dst
  760. if (IsScale1 && !hasLEAOffset(Offset)) {
  761. bool BIK = Base.isKill() && BaseReg != IndexReg;
  762. TII->copyPhysReg(MBB, MI, MI.getDebugLoc(), DestReg, BaseReg, BIK);
  763. LLVM_DEBUG(MI.getPrevNode()->dump(););
  764. unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());
  765. NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
  766. .addReg(DestReg)
  767. .add(Index);
  768. LLVM_DEBUG(NewMI->dump(););
  769. MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
  770. MBB.erase(I);
  771. I = NewMI;
  772. return;
  773. }
  774. // lea offset(%base,%index,scale), %dst =>
  775. // lea offset( ,%index,scale), %dst; add %base,%dst
  776. NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode))
  777. .add(Dest)
  778. .addReg(0)
  779. .add(Scale)
  780. .add(Index)
  781. .add(Offset)
  782. .add(Segment);
  783. LLVM_DEBUG(NewMI->dump(););
  784. unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());
  785. NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
  786. .addReg(DestReg)
  787. .add(Base);
  788. LLVM_DEBUG(NewMI->dump(););
  789. MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
  790. MBB.erase(I);
  791. I = NewMI;
  792. }