X86OptimizeLEAs.cpp 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749
  1. //===- X86OptimizeLEAs.cpp - optimize usage of LEA instructions -----------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines the pass that performs some optimizations with LEA
  10. // instructions in order to improve performance and code size.
  11. // Currently, it does two things:
  12. // 1) If there are two LEA instructions calculating addresses which only differ
  13. // by displacement inside a basic block, one of them is removed.
  14. // 2) Address calculations in load and store instructions are replaced by
  15. // existing LEA def registers where possible.
  16. //
  17. //===----------------------------------------------------------------------===//
  18. #include "MCTargetDesc/X86BaseInfo.h"
  19. #include "X86.h"
  20. #include "X86InstrInfo.h"
  21. #include "X86Subtarget.h"
  22. #include "llvm/ADT/DenseMap.h"
  23. #include "llvm/ADT/DenseMapInfo.h"
  24. #include "llvm/ADT/Hashing.h"
  25. #include "llvm/ADT/SmallVector.h"
  26. #include "llvm/ADT/Statistic.h"
  27. #include "llvm/Analysis/ProfileSummaryInfo.h"
  28. #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
  29. #include "llvm/CodeGen/MachineBasicBlock.h"
  30. #include "llvm/CodeGen/MachineFunction.h"
  31. #include "llvm/CodeGen/MachineFunctionPass.h"
  32. #include "llvm/CodeGen/MachineInstr.h"
  33. #include "llvm/CodeGen/MachineInstrBuilder.h"
  34. #include "llvm/CodeGen/MachineOperand.h"
  35. #include "llvm/CodeGen/MachineRegisterInfo.h"
  36. #include "llvm/CodeGen/MachineSizeOpts.h"
  37. #include "llvm/CodeGen/TargetOpcodes.h"
  38. #include "llvm/CodeGen/TargetRegisterInfo.h"
  39. #include "llvm/IR/DebugInfoMetadata.h"
  40. #include "llvm/IR/DebugLoc.h"
  41. #include "llvm/IR/Function.h"
  42. #include "llvm/MC/MCInstrDesc.h"
  43. #include "llvm/Support/CommandLine.h"
  44. #include "llvm/Support/Debug.h"
  45. #include "llvm/Support/ErrorHandling.h"
  46. #include "llvm/Support/MathExtras.h"
  47. #include "llvm/Support/raw_ostream.h"
  48. #include <cassert>
  49. #include <cstdint>
  50. #include <iterator>
  51. using namespace llvm;
  52. #define DEBUG_TYPE "x86-optimize-LEAs"
  53. static cl::opt<bool>
  54. DisableX86LEAOpt("disable-x86-lea-opt", cl::Hidden,
  55. cl::desc("X86: Disable LEA optimizations."),
  56. cl::init(false));
  57. STATISTIC(NumSubstLEAs, "Number of LEA instruction substitutions");
  58. STATISTIC(NumRedundantLEAs, "Number of redundant LEA instructions removed");
  59. /// Returns true if two machine operands are identical and they are not
  60. /// physical registers.
  61. static inline bool isIdenticalOp(const MachineOperand &MO1,
  62. const MachineOperand &MO2);
  63. /// Returns true if two address displacement operands are of the same
  64. /// type and use the same symbol/index/address regardless of the offset.
  65. static bool isSimilarDispOp(const MachineOperand &MO1,
  66. const MachineOperand &MO2);
  67. /// Returns true if the instruction is LEA.
  68. static inline bool isLEA(const MachineInstr &MI);
  69. namespace {
  70. /// A key based on instruction's memory operands.
  71. class MemOpKey {
  72. public:
  73. MemOpKey(const MachineOperand *Base, const MachineOperand *Scale,
  74. const MachineOperand *Index, const MachineOperand *Segment,
  75. const MachineOperand *Disp)
  76. : Disp(Disp) {
  77. Operands[0] = Base;
  78. Operands[1] = Scale;
  79. Operands[2] = Index;
  80. Operands[3] = Segment;
  81. }
  82. bool operator==(const MemOpKey &Other) const {
  83. // Addresses' bases, scales, indices and segments must be identical.
  84. for (int i = 0; i < 4; ++i)
  85. if (!isIdenticalOp(*Operands[i], *Other.Operands[i]))
  86. return false;
  87. // Addresses' displacements don't have to be exactly the same. It only
  88. // matters that they use the same symbol/index/address. Immediates' or
  89. // offsets' differences will be taken care of during instruction
  90. // substitution.
  91. return isSimilarDispOp(*Disp, *Other.Disp);
  92. }
  93. // Address' base, scale, index and segment operands.
  94. const MachineOperand *Operands[4];
  95. // Address' displacement operand.
  96. const MachineOperand *Disp;
  97. };
  98. } // end anonymous namespace
  99. namespace llvm {
  100. /// Provide DenseMapInfo for MemOpKey.
  101. template <> struct DenseMapInfo<MemOpKey> {
  102. using PtrInfo = DenseMapInfo<const MachineOperand *>;
  103. static inline MemOpKey getEmptyKey() {
  104. return MemOpKey(PtrInfo::getEmptyKey(), PtrInfo::getEmptyKey(),
  105. PtrInfo::getEmptyKey(), PtrInfo::getEmptyKey(),
  106. PtrInfo::getEmptyKey());
  107. }
  108. static inline MemOpKey getTombstoneKey() {
  109. return MemOpKey(PtrInfo::getTombstoneKey(), PtrInfo::getTombstoneKey(),
  110. PtrInfo::getTombstoneKey(), PtrInfo::getTombstoneKey(),
  111. PtrInfo::getTombstoneKey());
  112. }
  113. static unsigned getHashValue(const MemOpKey &Val) {
  114. // Checking any field of MemOpKey is enough to determine if the key is
  115. // empty or tombstone.
  116. assert(Val.Disp != PtrInfo::getEmptyKey() && "Cannot hash the empty key");
  117. assert(Val.Disp != PtrInfo::getTombstoneKey() &&
  118. "Cannot hash the tombstone key");
  119. hash_code Hash = hash_combine(*Val.Operands[0], *Val.Operands[1],
  120. *Val.Operands[2], *Val.Operands[3]);
  121. // If the address displacement is an immediate, it should not affect the
  122. // hash so that memory operands which differ only be immediate displacement
  123. // would have the same hash. If the address displacement is something else,
  124. // we should reflect symbol/index/address in the hash.
  125. switch (Val.Disp->getType()) {
  126. case MachineOperand::MO_Immediate:
  127. break;
  128. case MachineOperand::MO_ConstantPoolIndex:
  129. case MachineOperand::MO_JumpTableIndex:
  130. Hash = hash_combine(Hash, Val.Disp->getIndex());
  131. break;
  132. case MachineOperand::MO_ExternalSymbol:
  133. Hash = hash_combine(Hash, Val.Disp->getSymbolName());
  134. break;
  135. case MachineOperand::MO_GlobalAddress:
  136. Hash = hash_combine(Hash, Val.Disp->getGlobal());
  137. break;
  138. case MachineOperand::MO_BlockAddress:
  139. Hash = hash_combine(Hash, Val.Disp->getBlockAddress());
  140. break;
  141. case MachineOperand::MO_MCSymbol:
  142. Hash = hash_combine(Hash, Val.Disp->getMCSymbol());
  143. break;
  144. case MachineOperand::MO_MachineBasicBlock:
  145. Hash = hash_combine(Hash, Val.Disp->getMBB());
  146. break;
  147. default:
  148. llvm_unreachable("Invalid address displacement operand");
  149. }
  150. return (unsigned)Hash;
  151. }
  152. static bool isEqual(const MemOpKey &LHS, const MemOpKey &RHS) {
  153. // Checking any field of MemOpKey is enough to determine if the key is
  154. // empty or tombstone.
  155. if (RHS.Disp == PtrInfo::getEmptyKey())
  156. return LHS.Disp == PtrInfo::getEmptyKey();
  157. if (RHS.Disp == PtrInfo::getTombstoneKey())
  158. return LHS.Disp == PtrInfo::getTombstoneKey();
  159. return LHS == RHS;
  160. }
  161. };
  162. } // end namespace llvm
  163. /// Returns a hash table key based on memory operands of \p MI. The
  164. /// number of the first memory operand of \p MI is specified through \p N.
  165. static inline MemOpKey getMemOpKey(const MachineInstr &MI, unsigned N) {
  166. assert((isLEA(MI) || MI.mayLoadOrStore()) &&
  167. "The instruction must be a LEA, a load or a store");
  168. return MemOpKey(&MI.getOperand(N + X86::AddrBaseReg),
  169. &MI.getOperand(N + X86::AddrScaleAmt),
  170. &MI.getOperand(N + X86::AddrIndexReg),
  171. &MI.getOperand(N + X86::AddrSegmentReg),
  172. &MI.getOperand(N + X86::AddrDisp));
  173. }
  174. static inline bool isIdenticalOp(const MachineOperand &MO1,
  175. const MachineOperand &MO2) {
  176. return MO1.isIdenticalTo(MO2) &&
  177. (!MO1.isReg() || !Register::isPhysicalRegister(MO1.getReg()));
  178. }
  179. #ifndef NDEBUG
  180. static bool isValidDispOp(const MachineOperand &MO) {
  181. return MO.isImm() || MO.isCPI() || MO.isJTI() || MO.isSymbol() ||
  182. MO.isGlobal() || MO.isBlockAddress() || MO.isMCSymbol() || MO.isMBB();
  183. }
  184. #endif
  185. static bool isSimilarDispOp(const MachineOperand &MO1,
  186. const MachineOperand &MO2) {
  187. assert(isValidDispOp(MO1) && isValidDispOp(MO2) &&
  188. "Address displacement operand is not valid");
  189. return (MO1.isImm() && MO2.isImm()) ||
  190. (MO1.isCPI() && MO2.isCPI() && MO1.getIndex() == MO2.getIndex()) ||
  191. (MO1.isJTI() && MO2.isJTI() && MO1.getIndex() == MO2.getIndex()) ||
  192. (MO1.isSymbol() && MO2.isSymbol() &&
  193. MO1.getSymbolName() == MO2.getSymbolName()) ||
  194. (MO1.isGlobal() && MO2.isGlobal() &&
  195. MO1.getGlobal() == MO2.getGlobal()) ||
  196. (MO1.isBlockAddress() && MO2.isBlockAddress() &&
  197. MO1.getBlockAddress() == MO2.getBlockAddress()) ||
  198. (MO1.isMCSymbol() && MO2.isMCSymbol() &&
  199. MO1.getMCSymbol() == MO2.getMCSymbol()) ||
  200. (MO1.isMBB() && MO2.isMBB() && MO1.getMBB() == MO2.getMBB());
  201. }
  202. static inline bool isLEA(const MachineInstr &MI) {
  203. unsigned Opcode = MI.getOpcode();
  204. return Opcode == X86::LEA16r || Opcode == X86::LEA32r ||
  205. Opcode == X86::LEA64r || Opcode == X86::LEA64_32r;
  206. }
  207. namespace {
  208. class X86OptimizeLEAPass : public MachineFunctionPass {
  209. public:
  210. X86OptimizeLEAPass() : MachineFunctionPass(ID) {}
  211. StringRef getPassName() const override { return "X86 LEA Optimize"; }
  212. /// Loop over all of the basic blocks, replacing address
  213. /// calculations in load and store instructions, if it's already
  214. /// been calculated by LEA. Also, remove redundant LEAs.
  215. bool runOnMachineFunction(MachineFunction &MF) override;
  216. static char ID;
  217. void getAnalysisUsage(AnalysisUsage &AU) const override {
  218. AU.addRequired<ProfileSummaryInfoWrapperPass>();
  219. AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
  220. MachineFunctionPass::getAnalysisUsage(AU);
  221. }
  222. private:
  223. using MemOpMap = DenseMap<MemOpKey, SmallVector<MachineInstr *, 16>>;
  224. /// Returns a distance between two instructions inside one basic block.
  225. /// Negative result means, that instructions occur in reverse order.
  226. int calcInstrDist(const MachineInstr &First, const MachineInstr &Last);
  227. /// Choose the best \p LEA instruction from the \p List to replace
  228. /// address calculation in \p MI instruction. Return the address displacement
  229. /// and the distance between \p MI and the chosen \p BestLEA in
  230. /// \p AddrDispShift and \p Dist.
  231. bool chooseBestLEA(const SmallVectorImpl<MachineInstr *> &List,
  232. const MachineInstr &MI, MachineInstr *&BestLEA,
  233. int64_t &AddrDispShift, int &Dist);
  234. /// Returns the difference between addresses' displacements of \p MI1
  235. /// and \p MI2. The numbers of the first memory operands for the instructions
  236. /// are specified through \p N1 and \p N2.
  237. int64_t getAddrDispShift(const MachineInstr &MI1, unsigned N1,
  238. const MachineInstr &MI2, unsigned N2) const;
  239. /// Returns true if the \p Last LEA instruction can be replaced by the
  240. /// \p First. The difference between displacements of the addresses calculated
  241. /// by these LEAs is returned in \p AddrDispShift. It'll be used for proper
  242. /// replacement of the \p Last LEA's uses with the \p First's def register.
  243. bool isReplaceable(const MachineInstr &First, const MachineInstr &Last,
  244. int64_t &AddrDispShift) const;
  245. /// Find all LEA instructions in the basic block. Also, assign position
  246. /// numbers to all instructions in the basic block to speed up calculation of
  247. /// distance between them.
  248. void findLEAs(const MachineBasicBlock &MBB, MemOpMap &LEAs);
  249. /// Removes redundant address calculations.
  250. bool removeRedundantAddrCalc(MemOpMap &LEAs);
  251. /// Replace debug value MI with a new debug value instruction using register
  252. /// VReg with an appropriate offset and DIExpression to incorporate the
  253. /// address displacement AddrDispShift. Return new debug value instruction.
  254. MachineInstr *replaceDebugValue(MachineInstr &MI, unsigned OldReg,
  255. unsigned NewReg, int64_t AddrDispShift);
  256. /// Removes LEAs which calculate similar addresses.
  257. bool removeRedundantLEAs(MemOpMap &LEAs);
  258. DenseMap<const MachineInstr *, unsigned> InstrPos;
  259. MachineRegisterInfo *MRI = nullptr;
  260. const X86InstrInfo *TII = nullptr;
  261. const X86RegisterInfo *TRI = nullptr;
  262. };
  263. } // end anonymous namespace
  264. char X86OptimizeLEAPass::ID = 0;
  265. FunctionPass *llvm::createX86OptimizeLEAs() { return new X86OptimizeLEAPass(); }
  266. INITIALIZE_PASS(X86OptimizeLEAPass, DEBUG_TYPE, "X86 optimize LEA pass", false,
  267. false)
  268. int X86OptimizeLEAPass::calcInstrDist(const MachineInstr &First,
  269. const MachineInstr &Last) {
  270. // Both instructions must be in the same basic block and they must be
  271. // presented in InstrPos.
  272. assert(Last.getParent() == First.getParent() &&
  273. "Instructions are in different basic blocks");
  274. assert(InstrPos.find(&First) != InstrPos.end() &&
  275. InstrPos.find(&Last) != InstrPos.end() &&
  276. "Instructions' positions are undefined");
  277. return InstrPos[&Last] - InstrPos[&First];
  278. }
  279. // Find the best LEA instruction in the List to replace address recalculation in
  280. // MI. Such LEA must meet these requirements:
  281. // 1) The address calculated by the LEA differs only by the displacement from
  282. // the address used in MI.
  283. // 2) The register class of the definition of the LEA is compatible with the
  284. // register class of the address base register of MI.
  285. // 3) Displacement of the new memory operand should fit in 1 byte if possible.
  286. // 4) The LEA should be as close to MI as possible, and prior to it if
  287. // possible.
  288. bool X86OptimizeLEAPass::chooseBestLEA(
  289. const SmallVectorImpl<MachineInstr *> &List, const MachineInstr &MI,
  290. MachineInstr *&BestLEA, int64_t &AddrDispShift, int &Dist) {
  291. const MachineFunction *MF = MI.getParent()->getParent();
  292. const MCInstrDesc &Desc = MI.getDesc();
  293. int MemOpNo = X86II::getMemoryOperandNo(Desc.TSFlags) +
  294. X86II::getOperandBias(Desc);
  295. BestLEA = nullptr;
  296. // Loop over all LEA instructions.
  297. for (auto DefMI : List) {
  298. // Get new address displacement.
  299. int64_t AddrDispShiftTemp = getAddrDispShift(MI, MemOpNo, *DefMI, 1);
  300. // Make sure address displacement fits 4 bytes.
  301. if (!isInt<32>(AddrDispShiftTemp))
  302. continue;
  303. // Check that LEA def register can be used as MI address base. Some
  304. // instructions can use a limited set of registers as address base, for
  305. // example MOV8mr_NOREX. We could constrain the register class of the LEA
  306. // def to suit MI, however since this case is very rare and hard to
  307. // reproduce in a test it's just more reliable to skip the LEA.
  308. if (TII->getRegClass(Desc, MemOpNo + X86::AddrBaseReg, TRI, *MF) !=
  309. MRI->getRegClass(DefMI->getOperand(0).getReg()))
  310. continue;
  311. // Choose the closest LEA instruction from the list, prior to MI if
  312. // possible. Note that we took into account resulting address displacement
  313. // as well. Also note that the list is sorted by the order in which the LEAs
  314. // occur, so the break condition is pretty simple.
  315. int DistTemp = calcInstrDist(*DefMI, MI);
  316. assert(DistTemp != 0 &&
  317. "The distance between two different instructions cannot be zero");
  318. if (DistTemp > 0 || BestLEA == nullptr) {
  319. // Do not update return LEA, if the current one provides a displacement
  320. // which fits in 1 byte, while the new candidate does not.
  321. if (BestLEA != nullptr && !isInt<8>(AddrDispShiftTemp) &&
  322. isInt<8>(AddrDispShift))
  323. continue;
  324. BestLEA = DefMI;
  325. AddrDispShift = AddrDispShiftTemp;
  326. Dist = DistTemp;
  327. }
  328. // FIXME: Maybe we should not always stop at the first LEA after MI.
  329. if (DistTemp < 0)
  330. break;
  331. }
  332. return BestLEA != nullptr;
  333. }
  334. // Get the difference between the addresses' displacements of the two
  335. // instructions \p MI1 and \p MI2. The numbers of the first memory operands are
  336. // passed through \p N1 and \p N2.
  337. int64_t X86OptimizeLEAPass::getAddrDispShift(const MachineInstr &MI1,
  338. unsigned N1,
  339. const MachineInstr &MI2,
  340. unsigned N2) const {
  341. const MachineOperand &Op1 = MI1.getOperand(N1 + X86::AddrDisp);
  342. const MachineOperand &Op2 = MI2.getOperand(N2 + X86::AddrDisp);
  343. assert(isSimilarDispOp(Op1, Op2) &&
  344. "Address displacement operands are not compatible");
  345. // After the assert above we can be sure that both operands are of the same
  346. // valid type and use the same symbol/index/address, thus displacement shift
  347. // calculation is rather simple.
  348. if (Op1.isJTI())
  349. return 0;
  350. return Op1.isImm() ? Op1.getImm() - Op2.getImm()
  351. : Op1.getOffset() - Op2.getOffset();
  352. }
  353. // Check that the Last LEA can be replaced by the First LEA. To be so,
  354. // these requirements must be met:
  355. // 1) Addresses calculated by LEAs differ only by displacement.
  356. // 2) Def registers of LEAs belong to the same class.
  357. // 3) All uses of the Last LEA def register are replaceable, thus the
  358. // register is used only as address base.
  359. bool X86OptimizeLEAPass::isReplaceable(const MachineInstr &First,
  360. const MachineInstr &Last,
  361. int64_t &AddrDispShift) const {
  362. assert(isLEA(First) && isLEA(Last) &&
  363. "The function works only with LEA instructions");
  364. // Make sure that LEA def registers belong to the same class. There may be
  365. // instructions (like MOV8mr_NOREX) which allow a limited set of registers to
  366. // be used as their operands, so we must be sure that replacing one LEA
  367. // with another won't lead to putting a wrong register in the instruction.
  368. if (MRI->getRegClass(First.getOperand(0).getReg()) !=
  369. MRI->getRegClass(Last.getOperand(0).getReg()))
  370. return false;
  371. // Get new address displacement.
  372. AddrDispShift = getAddrDispShift(Last, 1, First, 1);
  373. // Loop over all uses of the Last LEA to check that its def register is
  374. // used only as address base for memory accesses. If so, it can be
  375. // replaced, otherwise - no.
  376. for (auto &MO : MRI->use_nodbg_operands(Last.getOperand(0).getReg())) {
  377. MachineInstr &MI = *MO.getParent();
  378. // Get the number of the first memory operand.
  379. const MCInstrDesc &Desc = MI.getDesc();
  380. int MemOpNo = X86II::getMemoryOperandNo(Desc.TSFlags);
  381. // If the use instruction has no memory operand - the LEA is not
  382. // replaceable.
  383. if (MemOpNo < 0)
  384. return false;
  385. MemOpNo += X86II::getOperandBias(Desc);
  386. // If the address base of the use instruction is not the LEA def register -
  387. // the LEA is not replaceable.
  388. if (!isIdenticalOp(MI.getOperand(MemOpNo + X86::AddrBaseReg), MO))
  389. return false;
  390. // If the LEA def register is used as any other operand of the use
  391. // instruction - the LEA is not replaceable.
  392. for (unsigned i = 0; i < MI.getNumOperands(); i++)
  393. if (i != (unsigned)(MemOpNo + X86::AddrBaseReg) &&
  394. isIdenticalOp(MI.getOperand(i), MO))
  395. return false;
  396. // Check that the new address displacement will fit 4 bytes.
  397. if (MI.getOperand(MemOpNo + X86::AddrDisp).isImm() &&
  398. !isInt<32>(MI.getOperand(MemOpNo + X86::AddrDisp).getImm() +
  399. AddrDispShift))
  400. return false;
  401. }
  402. return true;
  403. }
  404. void X86OptimizeLEAPass::findLEAs(const MachineBasicBlock &MBB,
  405. MemOpMap &LEAs) {
  406. unsigned Pos = 0;
  407. for (auto &MI : MBB) {
  408. // Assign the position number to the instruction. Note that we are going to
  409. // move some instructions during the optimization however there will never
  410. // be a need to move two instructions before any selected instruction. So to
  411. // avoid multiple positions' updates during moves we just increase position
  412. // counter by two leaving a free space for instructions which will be moved.
  413. InstrPos[&MI] = Pos += 2;
  414. if (isLEA(MI))
  415. LEAs[getMemOpKey(MI, 1)].push_back(const_cast<MachineInstr *>(&MI));
  416. }
  417. }
  418. // Try to find load and store instructions which recalculate addresses already
  419. // calculated by some LEA and replace their memory operands with its def
  420. // register.
  421. bool X86OptimizeLEAPass::removeRedundantAddrCalc(MemOpMap &LEAs) {
  422. bool Changed = false;
  423. assert(!LEAs.empty());
  424. MachineBasicBlock *MBB = (*LEAs.begin()->second.begin())->getParent();
  425. // Process all instructions in basic block.
  426. for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) {
  427. // Instruction must be load or store.
  428. if (!MI.mayLoadOrStore())
  429. continue;
  430. // Get the number of the first memory operand.
  431. const MCInstrDesc &Desc = MI.getDesc();
  432. int MemOpNo = X86II::getMemoryOperandNo(Desc.TSFlags);
  433. // If instruction has no memory operand - skip it.
  434. if (MemOpNo < 0)
  435. continue;
  436. MemOpNo += X86II::getOperandBias(Desc);
  437. // Do not call chooseBestLEA if there was no matching LEA
  438. auto Insns = LEAs.find(getMemOpKey(MI, MemOpNo));
  439. if (Insns == LEAs.end())
  440. continue;
  441. // Get the best LEA instruction to replace address calculation.
  442. MachineInstr *DefMI;
  443. int64_t AddrDispShift;
  444. int Dist;
  445. if (!chooseBestLEA(Insns->second, MI, DefMI, AddrDispShift, Dist))
  446. continue;
  447. // If LEA occurs before current instruction, we can freely replace
  448. // the instruction. If LEA occurs after, we can lift LEA above the
  449. // instruction and this way to be able to replace it. Since LEA and the
  450. // instruction have similar memory operands (thus, the same def
  451. // instructions for these operands), we can always do that, without
  452. // worries of using registers before their defs.
  453. if (Dist < 0) {
  454. DefMI->removeFromParent();
  455. MBB->insert(MachineBasicBlock::iterator(&MI), DefMI);
  456. InstrPos[DefMI] = InstrPos[&MI] - 1;
  457. // Make sure the instructions' position numbers are sane.
  458. assert(((InstrPos[DefMI] == 1 &&
  459. MachineBasicBlock::iterator(DefMI) == MBB->begin()) ||
  460. InstrPos[DefMI] >
  461. InstrPos[&*std::prev(MachineBasicBlock::iterator(DefMI))]) &&
  462. "Instruction positioning is broken");
  463. }
  464. // Since we can possibly extend register lifetime, clear kill flags.
  465. MRI->clearKillFlags(DefMI->getOperand(0).getReg());
  466. ++NumSubstLEAs;
  467. LLVM_DEBUG(dbgs() << "OptimizeLEAs: Candidate to replace: "; MI.dump(););
  468. // Change instruction operands.
  469. MI.getOperand(MemOpNo + X86::AddrBaseReg)
  470. .ChangeToRegister(DefMI->getOperand(0).getReg(), false);
  471. MI.getOperand(MemOpNo + X86::AddrScaleAmt).ChangeToImmediate(1);
  472. MI.getOperand(MemOpNo + X86::AddrIndexReg)
  473. .ChangeToRegister(X86::NoRegister, false);
  474. MI.getOperand(MemOpNo + X86::AddrDisp).ChangeToImmediate(AddrDispShift);
  475. MI.getOperand(MemOpNo + X86::AddrSegmentReg)
  476. .ChangeToRegister(X86::NoRegister, false);
  477. LLVM_DEBUG(dbgs() << "OptimizeLEAs: Replaced by: "; MI.dump(););
  478. Changed = true;
  479. }
  480. return Changed;
  481. }
  482. MachineInstr *X86OptimizeLEAPass::replaceDebugValue(MachineInstr &MI,
  483. unsigned OldReg,
  484. unsigned NewReg,
  485. int64_t AddrDispShift) {
  486. const DIExpression *Expr = MI.getDebugExpression();
  487. if (AddrDispShift != 0) {
  488. if (MI.isNonListDebugValue()) {
  489. Expr =
  490. DIExpression::prepend(Expr, DIExpression::StackValue, AddrDispShift);
  491. } else {
  492. // Update the Expression, appending an offset of `AddrDispShift` to the
  493. // Op corresponding to `OldReg`.
  494. SmallVector<uint64_t, 3> Ops;
  495. DIExpression::appendOffset(Ops, AddrDispShift);
  496. for (MachineOperand &Op : MI.getDebugOperandsForReg(OldReg)) {
  497. unsigned OpIdx = MI.getDebugOperandIndex(&Op);
  498. Expr = DIExpression::appendOpsToArg(Expr, Ops, OpIdx);
  499. }
  500. }
  501. }
  502. // Replace DBG_VALUE instruction with modified version.
  503. MachineBasicBlock *MBB = MI.getParent();
  504. DebugLoc DL = MI.getDebugLoc();
  505. bool IsIndirect = MI.isIndirectDebugValue();
  506. const MDNode *Var = MI.getDebugVariable();
  507. unsigned Opcode = MI.isNonListDebugValue() ? TargetOpcode::DBG_VALUE
  508. : TargetOpcode::DBG_VALUE_LIST;
  509. if (IsIndirect)
  510. assert(MI.getDebugOffset().getImm() == 0 &&
  511. "DBG_VALUE with nonzero offset");
  512. SmallVector<MachineOperand, 4> NewOps;
  513. // If we encounter an operand using the old register, replace it with an
  514. // operand that uses the new register; otherwise keep the old operand.
  515. auto replaceOldReg = [OldReg, NewReg](const MachineOperand &Op) {
  516. if (Op.isReg() && Op.getReg() == OldReg)
  517. return MachineOperand::CreateReg(NewReg, false, false, false, false,
  518. false, false, false, false, false,
  519. /*IsRenamable*/ true);
  520. return Op;
  521. };
  522. for (const MachineOperand &Op : MI.debug_operands())
  523. NewOps.push_back(replaceOldReg(Op));
  524. return BuildMI(*MBB, MBB->erase(&MI), DL, TII->get(Opcode), IsIndirect,
  525. NewOps, Var, Expr);
  526. }
  527. // Try to find similar LEAs in the list and replace one with another.
  528. bool X86OptimizeLEAPass::removeRedundantLEAs(MemOpMap &LEAs) {
  529. bool Changed = false;
  530. // Loop over all entries in the table.
  531. for (auto &E : LEAs) {
  532. auto &List = E.second;
  533. // Loop over all LEA pairs.
  534. auto I1 = List.begin();
  535. while (I1 != List.end()) {
  536. MachineInstr &First = **I1;
  537. auto I2 = std::next(I1);
  538. while (I2 != List.end()) {
  539. MachineInstr &Last = **I2;
  540. int64_t AddrDispShift;
  541. // LEAs should be in occurrence order in the list, so we can freely
  542. // replace later LEAs with earlier ones.
  543. assert(calcInstrDist(First, Last) > 0 &&
  544. "LEAs must be in occurrence order in the list");
  545. // Check that the Last LEA instruction can be replaced by the First.
  546. if (!isReplaceable(First, Last, AddrDispShift)) {
  547. ++I2;
  548. continue;
  549. }
  550. // Loop over all uses of the Last LEA and update their operands. Note
  551. // that the correctness of this has already been checked in the
  552. // isReplaceable function.
  553. Register FirstVReg = First.getOperand(0).getReg();
  554. Register LastVReg = Last.getOperand(0).getReg();
  555. for (MachineOperand &MO :
  556. llvm::make_early_inc_range(MRI->use_operands(LastVReg))) {
  557. MachineInstr &MI = *MO.getParent();
  558. if (MI.isDebugValue()) {
  559. // Replace DBG_VALUE instruction with modified version using the
  560. // register from the replacing LEA and the address displacement
  561. // between the LEA instructions.
  562. replaceDebugValue(MI, LastVReg, FirstVReg, AddrDispShift);
  563. continue;
  564. }
  565. // Get the number of the first memory operand.
  566. const MCInstrDesc &Desc = MI.getDesc();
  567. int MemOpNo =
  568. X86II::getMemoryOperandNo(Desc.TSFlags) +
  569. X86II::getOperandBias(Desc);
  570. // Update address base.
  571. MO.setReg(FirstVReg);
  572. // Update address disp.
  573. MachineOperand &Op = MI.getOperand(MemOpNo + X86::AddrDisp);
  574. if (Op.isImm())
  575. Op.setImm(Op.getImm() + AddrDispShift);
  576. else if (!Op.isJTI())
  577. Op.setOffset(Op.getOffset() + AddrDispShift);
  578. }
  579. // Since we can possibly extend register lifetime, clear kill flags.
  580. MRI->clearKillFlags(FirstVReg);
  581. ++NumRedundantLEAs;
  582. LLVM_DEBUG(dbgs() << "OptimizeLEAs: Remove redundant LEA: ";
  583. Last.dump(););
  584. // By this moment, all of the Last LEA's uses must be replaced. So we
  585. // can freely remove it.
  586. assert(MRI->use_empty(LastVReg) &&
  587. "The LEA's def register must have no uses");
  588. Last.eraseFromParent();
  589. // Erase removed LEA from the list.
  590. I2 = List.erase(I2);
  591. Changed = true;
  592. }
  593. ++I1;
  594. }
  595. }
  596. return Changed;
  597. }
  598. bool X86OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) {
  599. bool Changed = false;
  600. if (DisableX86LEAOpt || skipFunction(MF.getFunction()))
  601. return false;
  602. MRI = &MF.getRegInfo();
  603. TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
  604. TRI = MF.getSubtarget<X86Subtarget>().getRegisterInfo();
  605. auto *PSI =
  606. &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
  607. auto *MBFI = (PSI && PSI->hasProfileSummary()) ?
  608. &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() :
  609. nullptr;
  610. // Process all basic blocks.
  611. for (auto &MBB : MF) {
  612. MemOpMap LEAs;
  613. InstrPos.clear();
  614. // Find all LEA instructions in basic block.
  615. findLEAs(MBB, LEAs);
  616. // If current basic block has no LEAs, move on to the next one.
  617. if (LEAs.empty())
  618. continue;
  619. // Remove redundant LEA instructions.
  620. Changed |= removeRedundantLEAs(LEAs);
  621. // Remove redundant address calculations. Do it only for -Os/-Oz since only
  622. // a code size gain is expected from this part of the pass.
  623. bool OptForSize = MF.getFunction().hasOptSize() ||
  624. llvm::shouldOptimizeForSize(&MBB, PSI, MBFI);
  625. if (OptForSize)
  626. Changed |= removeRedundantAddrCalc(LEAs);
  627. }
  628. return Changed;
  629. }