X86OptimizeLEAs.cpp 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752
  1. //===- X86OptimizeLEAs.cpp - optimize usage of LEA instructions -----------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines the pass that performs some optimizations with LEA
  10. // instructions in order to improve performance and code size.
  11. // Currently, it does two things:
  12. // 1) If there are two LEA instructions calculating addresses which only differ
  13. // by displacement inside a basic block, one of them is removed.
  14. // 2) Address calculations in load and store instructions are replaced by
  15. // existing LEA def registers where possible.
  16. //
  17. //===----------------------------------------------------------------------===//
  18. #include "MCTargetDesc/X86BaseInfo.h"
  19. #include "X86.h"
  20. #include "X86InstrInfo.h"
  21. #include "X86Subtarget.h"
  22. #include "llvm/ADT/DenseMap.h"
  23. #include "llvm/ADT/DenseMapInfo.h"
  24. #include "llvm/ADT/Hashing.h"
  25. #include "llvm/ADT/SmallVector.h"
  26. #include "llvm/ADT/Statistic.h"
  27. #include "llvm/Analysis/ProfileSummaryInfo.h"
  28. #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
  29. #include "llvm/CodeGen/MachineBasicBlock.h"
  30. #include "llvm/CodeGen/MachineFunction.h"
  31. #include "llvm/CodeGen/MachineFunctionPass.h"
  32. #include "llvm/CodeGen/MachineInstr.h"
  33. #include "llvm/CodeGen/MachineInstrBuilder.h"
  34. #include "llvm/CodeGen/MachineOperand.h"
  35. #include "llvm/CodeGen/MachineRegisterInfo.h"
  36. #include "llvm/CodeGen/MachineSizeOpts.h"
  37. #include "llvm/CodeGen/TargetOpcodes.h"
  38. #include "llvm/CodeGen/TargetRegisterInfo.h"
  39. #include "llvm/IR/DebugInfoMetadata.h"
  40. #include "llvm/IR/DebugLoc.h"
  41. #include "llvm/IR/Function.h"
  42. #include "llvm/MC/MCInstrDesc.h"
  43. #include "llvm/Support/CommandLine.h"
  44. #include "llvm/Support/Debug.h"
  45. #include "llvm/Support/ErrorHandling.h"
  46. #include "llvm/Support/MathExtras.h"
  47. #include "llvm/Support/raw_ostream.h"
  48. #include <cassert>
  49. #include <cstdint>
  50. #include <iterator>
  51. using namespace llvm;
  52. #define DEBUG_TYPE "x86-optimize-LEAs"
  53. static cl::opt<bool>
  54. DisableX86LEAOpt("disable-x86-lea-opt", cl::Hidden,
  55. cl::desc("X86: Disable LEA optimizations."),
  56. cl::init(false));
  57. STATISTIC(NumSubstLEAs, "Number of LEA instruction substitutions");
  58. STATISTIC(NumRedundantLEAs, "Number of redundant LEA instructions removed");
  59. /// Returns true if two machine operands are identical and they are not
  60. /// physical registers.
  61. static inline bool isIdenticalOp(const MachineOperand &MO1,
  62. const MachineOperand &MO2);
  63. /// Returns true if two address displacement operands are of the same
  64. /// type and use the same symbol/index/address regardless of the offset.
  65. static bool isSimilarDispOp(const MachineOperand &MO1,
  66. const MachineOperand &MO2);
  67. /// Returns true if the instruction is LEA.
  68. static inline bool isLEA(const MachineInstr &MI);
  69. namespace {
  70. /// A key based on instruction's memory operands.
  71. class MemOpKey {
  72. public:
  73. MemOpKey(const MachineOperand *Base, const MachineOperand *Scale,
  74. const MachineOperand *Index, const MachineOperand *Segment,
  75. const MachineOperand *Disp)
  76. : Disp(Disp) {
  77. Operands[0] = Base;
  78. Operands[1] = Scale;
  79. Operands[2] = Index;
  80. Operands[3] = Segment;
  81. }
  82. bool operator==(const MemOpKey &Other) const {
  83. // Addresses' bases, scales, indices and segments must be identical.
  84. for (int i = 0; i < 4; ++i)
  85. if (!isIdenticalOp(*Operands[i], *Other.Operands[i]))
  86. return false;
  87. // Addresses' displacements don't have to be exactly the same. It only
  88. // matters that they use the same symbol/index/address. Immediates' or
  89. // offsets' differences will be taken care of during instruction
  90. // substitution.
  91. return isSimilarDispOp(*Disp, *Other.Disp);
  92. }
  93. // Address' base, scale, index and segment operands.
  94. const MachineOperand *Operands[4];
  95. // Address' displacement operand.
  96. const MachineOperand *Disp;
  97. };
  98. } // end anonymous namespace
  99. namespace llvm {
  100. /// Provide DenseMapInfo for MemOpKey.
  101. template <> struct DenseMapInfo<MemOpKey> {
  102. using PtrInfo = DenseMapInfo<const MachineOperand *>;
  103. static inline MemOpKey getEmptyKey() {
  104. return MemOpKey(PtrInfo::getEmptyKey(), PtrInfo::getEmptyKey(),
  105. PtrInfo::getEmptyKey(), PtrInfo::getEmptyKey(),
  106. PtrInfo::getEmptyKey());
  107. }
  108. static inline MemOpKey getTombstoneKey() {
  109. return MemOpKey(PtrInfo::getTombstoneKey(), PtrInfo::getTombstoneKey(),
  110. PtrInfo::getTombstoneKey(), PtrInfo::getTombstoneKey(),
  111. PtrInfo::getTombstoneKey());
  112. }
  113. static unsigned getHashValue(const MemOpKey &Val) {
  114. // Checking any field of MemOpKey is enough to determine if the key is
  115. // empty or tombstone.
  116. assert(Val.Disp != PtrInfo::getEmptyKey() && "Cannot hash the empty key");
  117. assert(Val.Disp != PtrInfo::getTombstoneKey() &&
  118. "Cannot hash the tombstone key");
  119. hash_code Hash = hash_combine(*Val.Operands[0], *Val.Operands[1],
  120. *Val.Operands[2], *Val.Operands[3]);
  121. // If the address displacement is an immediate, it should not affect the
  122. // hash so that memory operands which differ only be immediate displacement
  123. // would have the same hash. If the address displacement is something else,
  124. // we should reflect symbol/index/address in the hash.
  125. switch (Val.Disp->getType()) {
  126. case MachineOperand::MO_Immediate:
  127. break;
  128. case MachineOperand::MO_ConstantPoolIndex:
  129. case MachineOperand::MO_JumpTableIndex:
  130. Hash = hash_combine(Hash, Val.Disp->getIndex());
  131. break;
  132. case MachineOperand::MO_ExternalSymbol:
  133. Hash = hash_combine(Hash, Val.Disp->getSymbolName());
  134. break;
  135. case MachineOperand::MO_GlobalAddress:
  136. Hash = hash_combine(Hash, Val.Disp->getGlobal());
  137. break;
  138. case MachineOperand::MO_BlockAddress:
  139. Hash = hash_combine(Hash, Val.Disp->getBlockAddress());
  140. break;
  141. case MachineOperand::MO_MCSymbol:
  142. Hash = hash_combine(Hash, Val.Disp->getMCSymbol());
  143. break;
  144. case MachineOperand::MO_MachineBasicBlock:
  145. Hash = hash_combine(Hash, Val.Disp->getMBB());
  146. break;
  147. default:
  148. llvm_unreachable("Invalid address displacement operand");
  149. }
  150. return (unsigned)Hash;
  151. }
  152. static bool isEqual(const MemOpKey &LHS, const MemOpKey &RHS) {
  153. // Checking any field of MemOpKey is enough to determine if the key is
  154. // empty or tombstone.
  155. if (RHS.Disp == PtrInfo::getEmptyKey())
  156. return LHS.Disp == PtrInfo::getEmptyKey();
  157. if (RHS.Disp == PtrInfo::getTombstoneKey())
  158. return LHS.Disp == PtrInfo::getTombstoneKey();
  159. return LHS == RHS;
  160. }
  161. };
  162. } // end namespace llvm
  163. /// Returns a hash table key based on memory operands of \p MI. The
  164. /// number of the first memory operand of \p MI is specified through \p N.
  165. static inline MemOpKey getMemOpKey(const MachineInstr &MI, unsigned N) {
  166. assert((isLEA(MI) || MI.mayLoadOrStore()) &&
  167. "The instruction must be a LEA, a load or a store");
  168. return MemOpKey(&MI.getOperand(N + X86::AddrBaseReg),
  169. &MI.getOperand(N + X86::AddrScaleAmt),
  170. &MI.getOperand(N + X86::AddrIndexReg),
  171. &MI.getOperand(N + X86::AddrSegmentReg),
  172. &MI.getOperand(N + X86::AddrDisp));
  173. }
  174. static inline bool isIdenticalOp(const MachineOperand &MO1,
  175. const MachineOperand &MO2) {
  176. return MO1.isIdenticalTo(MO2) && (!MO1.isReg() || !MO1.getReg().isPhysical());
  177. }
  178. #ifndef NDEBUG
  179. static bool isValidDispOp(const MachineOperand &MO) {
  180. return MO.isImm() || MO.isCPI() || MO.isJTI() || MO.isSymbol() ||
  181. MO.isGlobal() || MO.isBlockAddress() || MO.isMCSymbol() || MO.isMBB();
  182. }
  183. #endif
  184. static bool isSimilarDispOp(const MachineOperand &MO1,
  185. const MachineOperand &MO2) {
  186. assert(isValidDispOp(MO1) && isValidDispOp(MO2) &&
  187. "Address displacement operand is not valid");
  188. return (MO1.isImm() && MO2.isImm()) ||
  189. (MO1.isCPI() && MO2.isCPI() && MO1.getIndex() == MO2.getIndex()) ||
  190. (MO1.isJTI() && MO2.isJTI() && MO1.getIndex() == MO2.getIndex()) ||
  191. (MO1.isSymbol() && MO2.isSymbol() &&
  192. MO1.getSymbolName() == MO2.getSymbolName()) ||
  193. (MO1.isGlobal() && MO2.isGlobal() &&
  194. MO1.getGlobal() == MO2.getGlobal()) ||
  195. (MO1.isBlockAddress() && MO2.isBlockAddress() &&
  196. MO1.getBlockAddress() == MO2.getBlockAddress()) ||
  197. (MO1.isMCSymbol() && MO2.isMCSymbol() &&
  198. MO1.getMCSymbol() == MO2.getMCSymbol()) ||
  199. (MO1.isMBB() && MO2.isMBB() && MO1.getMBB() == MO2.getMBB());
  200. }
  201. static inline bool isLEA(const MachineInstr &MI) {
  202. unsigned Opcode = MI.getOpcode();
  203. return Opcode == X86::LEA16r || Opcode == X86::LEA32r ||
  204. Opcode == X86::LEA64r || Opcode == X86::LEA64_32r;
  205. }
  206. namespace {
  207. class X86OptimizeLEAPass : public MachineFunctionPass {
  208. public:
  209. X86OptimizeLEAPass() : MachineFunctionPass(ID) {}
  210. StringRef getPassName() const override { return "X86 LEA Optimize"; }
  211. /// Loop over all of the basic blocks, replacing address
  212. /// calculations in load and store instructions, if it's already
  213. /// been calculated by LEA. Also, remove redundant LEAs.
  214. bool runOnMachineFunction(MachineFunction &MF) override;
  215. static char ID;
  216. void getAnalysisUsage(AnalysisUsage &AU) const override {
  217. AU.addRequired<ProfileSummaryInfoWrapperPass>();
  218. AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
  219. MachineFunctionPass::getAnalysisUsage(AU);
  220. }
  221. private:
  222. using MemOpMap = DenseMap<MemOpKey, SmallVector<MachineInstr *, 16>>;
  223. /// Returns a distance between two instructions inside one basic block.
  224. /// Negative result means, that instructions occur in reverse order.
  225. int calcInstrDist(const MachineInstr &First, const MachineInstr &Last);
  226. /// Choose the best \p LEA instruction from the \p List to replace
  227. /// address calculation in \p MI instruction. Return the address displacement
  228. /// and the distance between \p MI and the chosen \p BestLEA in
  229. /// \p AddrDispShift and \p Dist.
  230. bool chooseBestLEA(const SmallVectorImpl<MachineInstr *> &List,
  231. const MachineInstr &MI, MachineInstr *&BestLEA,
  232. int64_t &AddrDispShift, int &Dist);
  233. /// Returns the difference between addresses' displacements of \p MI1
  234. /// and \p MI2. The numbers of the first memory operands for the instructions
  235. /// are specified through \p N1 and \p N2.
  236. int64_t getAddrDispShift(const MachineInstr &MI1, unsigned N1,
  237. const MachineInstr &MI2, unsigned N2) const;
  238. /// Returns true if the \p Last LEA instruction can be replaced by the
  239. /// \p First. The difference between displacements of the addresses calculated
  240. /// by these LEAs is returned in \p AddrDispShift. It'll be used for proper
  241. /// replacement of the \p Last LEA's uses with the \p First's def register.
  242. bool isReplaceable(const MachineInstr &First, const MachineInstr &Last,
  243. int64_t &AddrDispShift) const;
  244. /// Find all LEA instructions in the basic block. Also, assign position
  245. /// numbers to all instructions in the basic block to speed up calculation of
  246. /// distance between them.
  247. void findLEAs(const MachineBasicBlock &MBB, MemOpMap &LEAs);
  248. /// Removes redundant address calculations.
  249. bool removeRedundantAddrCalc(MemOpMap &LEAs);
  250. /// Replace debug value MI with a new debug value instruction using register
  251. /// VReg with an appropriate offset and DIExpression to incorporate the
  252. /// address displacement AddrDispShift. Return new debug value instruction.
  253. MachineInstr *replaceDebugValue(MachineInstr &MI, unsigned OldReg,
  254. unsigned NewReg, int64_t AddrDispShift);
  255. /// Removes LEAs which calculate similar addresses.
  256. bool removeRedundantLEAs(MemOpMap &LEAs);
  257. DenseMap<const MachineInstr *, unsigned> InstrPos;
  258. MachineRegisterInfo *MRI = nullptr;
  259. const X86InstrInfo *TII = nullptr;
  260. const X86RegisterInfo *TRI = nullptr;
  261. };
  262. } // end anonymous namespace
  263. char X86OptimizeLEAPass::ID = 0;
  264. FunctionPass *llvm::createX86OptimizeLEAs() { return new X86OptimizeLEAPass(); }
  265. INITIALIZE_PASS(X86OptimizeLEAPass, DEBUG_TYPE, "X86 optimize LEA pass", false,
  266. false)
  267. int X86OptimizeLEAPass::calcInstrDist(const MachineInstr &First,
  268. const MachineInstr &Last) {
  269. // Both instructions must be in the same basic block and they must be
  270. // presented in InstrPos.
  271. assert(Last.getParent() == First.getParent() &&
  272. "Instructions are in different basic blocks");
  273. assert(InstrPos.find(&First) != InstrPos.end() &&
  274. InstrPos.find(&Last) != InstrPos.end() &&
  275. "Instructions' positions are undefined");
  276. return InstrPos[&Last] - InstrPos[&First];
  277. }
  278. // Find the best LEA instruction in the List to replace address recalculation in
  279. // MI. Such LEA must meet these requirements:
  280. // 1) The address calculated by the LEA differs only by the displacement from
  281. // the address used in MI.
  282. // 2) The register class of the definition of the LEA is compatible with the
  283. // register class of the address base register of MI.
  284. // 3) Displacement of the new memory operand should fit in 1 byte if possible.
  285. // 4) The LEA should be as close to MI as possible, and prior to it if
  286. // possible.
  287. bool X86OptimizeLEAPass::chooseBestLEA(
  288. const SmallVectorImpl<MachineInstr *> &List, const MachineInstr &MI,
  289. MachineInstr *&BestLEA, int64_t &AddrDispShift, int &Dist) {
  290. const MachineFunction *MF = MI.getParent()->getParent();
  291. const MCInstrDesc &Desc = MI.getDesc();
  292. int MemOpNo = X86II::getMemoryOperandNo(Desc.TSFlags) +
  293. X86II::getOperandBias(Desc);
  294. BestLEA = nullptr;
  295. // Loop over all LEA instructions.
  296. for (auto *DefMI : List) {
  297. // Get new address displacement.
  298. int64_t AddrDispShiftTemp = getAddrDispShift(MI, MemOpNo, *DefMI, 1);
  299. // Make sure address displacement fits 4 bytes.
  300. if (!isInt<32>(AddrDispShiftTemp))
  301. continue;
  302. // Check that LEA def register can be used as MI address base. Some
  303. // instructions can use a limited set of registers as address base, for
  304. // example MOV8mr_NOREX. We could constrain the register class of the LEA
  305. // def to suit MI, however since this case is very rare and hard to
  306. // reproduce in a test it's just more reliable to skip the LEA.
  307. if (TII->getRegClass(Desc, MemOpNo + X86::AddrBaseReg, TRI, *MF) !=
  308. MRI->getRegClass(DefMI->getOperand(0).getReg()))
  309. continue;
  310. // Choose the closest LEA instruction from the list, prior to MI if
  311. // possible. Note that we took into account resulting address displacement
  312. // as well. Also note that the list is sorted by the order in which the LEAs
  313. // occur, so the break condition is pretty simple.
  314. int DistTemp = calcInstrDist(*DefMI, MI);
  315. assert(DistTemp != 0 &&
  316. "The distance between two different instructions cannot be zero");
  317. if (DistTemp > 0 || BestLEA == nullptr) {
  318. // Do not update return LEA, if the current one provides a displacement
  319. // which fits in 1 byte, while the new candidate does not.
  320. if (BestLEA != nullptr && !isInt<8>(AddrDispShiftTemp) &&
  321. isInt<8>(AddrDispShift))
  322. continue;
  323. BestLEA = DefMI;
  324. AddrDispShift = AddrDispShiftTemp;
  325. Dist = DistTemp;
  326. }
  327. // FIXME: Maybe we should not always stop at the first LEA after MI.
  328. if (DistTemp < 0)
  329. break;
  330. }
  331. return BestLEA != nullptr;
  332. }
  333. // Get the difference between the addresses' displacements of the two
  334. // instructions \p MI1 and \p MI2. The numbers of the first memory operands are
  335. // passed through \p N1 and \p N2.
  336. int64_t X86OptimizeLEAPass::getAddrDispShift(const MachineInstr &MI1,
  337. unsigned N1,
  338. const MachineInstr &MI2,
  339. unsigned N2) const {
  340. const MachineOperand &Op1 = MI1.getOperand(N1 + X86::AddrDisp);
  341. const MachineOperand &Op2 = MI2.getOperand(N2 + X86::AddrDisp);
  342. assert(isSimilarDispOp(Op1, Op2) &&
  343. "Address displacement operands are not compatible");
  344. // After the assert above we can be sure that both operands are of the same
  345. // valid type and use the same symbol/index/address, thus displacement shift
  346. // calculation is rather simple.
  347. if (Op1.isJTI())
  348. return 0;
  349. return Op1.isImm() ? Op1.getImm() - Op2.getImm()
  350. : Op1.getOffset() - Op2.getOffset();
  351. }
  352. // Check that the Last LEA can be replaced by the First LEA. To be so,
  353. // these requirements must be met:
  354. // 1) Addresses calculated by LEAs differ only by displacement.
  355. // 2) Def registers of LEAs belong to the same class.
  356. // 3) All uses of the Last LEA def register are replaceable, thus the
  357. // register is used only as address base.
  358. bool X86OptimizeLEAPass::isReplaceable(const MachineInstr &First,
  359. const MachineInstr &Last,
  360. int64_t &AddrDispShift) const {
  361. assert(isLEA(First) && isLEA(Last) &&
  362. "The function works only with LEA instructions");
  363. // Make sure that LEA def registers belong to the same class. There may be
  364. // instructions (like MOV8mr_NOREX) which allow a limited set of registers to
  365. // be used as their operands, so we must be sure that replacing one LEA
  366. // with another won't lead to putting a wrong register in the instruction.
  367. if (MRI->getRegClass(First.getOperand(0).getReg()) !=
  368. MRI->getRegClass(Last.getOperand(0).getReg()))
  369. return false;
  370. // Get new address displacement.
  371. AddrDispShift = getAddrDispShift(Last, 1, First, 1);
  372. // Loop over all uses of the Last LEA to check that its def register is
  373. // used only as address base for memory accesses. If so, it can be
  374. // replaced, otherwise - no.
  375. for (auto &MO : MRI->use_nodbg_operands(Last.getOperand(0).getReg())) {
  376. MachineInstr &MI = *MO.getParent();
  377. // Get the number of the first memory operand.
  378. const MCInstrDesc &Desc = MI.getDesc();
  379. int MemOpNo = X86II::getMemoryOperandNo(Desc.TSFlags);
  380. // If the use instruction has no memory operand - the LEA is not
  381. // replaceable.
  382. if (MemOpNo < 0)
  383. return false;
  384. MemOpNo += X86II::getOperandBias(Desc);
  385. // If the address base of the use instruction is not the LEA def register -
  386. // the LEA is not replaceable.
  387. if (!isIdenticalOp(MI.getOperand(MemOpNo + X86::AddrBaseReg), MO))
  388. return false;
  389. // If the LEA def register is used as any other operand of the use
  390. // instruction - the LEA is not replaceable.
  391. for (unsigned i = 0; i < MI.getNumOperands(); i++)
  392. if (i != (unsigned)(MemOpNo + X86::AddrBaseReg) &&
  393. isIdenticalOp(MI.getOperand(i), MO))
  394. return false;
  395. // Check that the new address displacement will fit 4 bytes.
  396. if (MI.getOperand(MemOpNo + X86::AddrDisp).isImm() &&
  397. !isInt<32>(MI.getOperand(MemOpNo + X86::AddrDisp).getImm() +
  398. AddrDispShift))
  399. return false;
  400. }
  401. return true;
  402. }
  403. void X86OptimizeLEAPass::findLEAs(const MachineBasicBlock &MBB,
  404. MemOpMap &LEAs) {
  405. unsigned Pos = 0;
  406. for (auto &MI : MBB) {
  407. // Assign the position number to the instruction. Note that we are going to
  408. // move some instructions during the optimization however there will never
  409. // be a need to move two instructions before any selected instruction. So to
  410. // avoid multiple positions' updates during moves we just increase position
  411. // counter by two leaving a free space for instructions which will be moved.
  412. InstrPos[&MI] = Pos += 2;
  413. if (isLEA(MI))
  414. LEAs[getMemOpKey(MI, 1)].push_back(const_cast<MachineInstr *>(&MI));
  415. }
  416. }
  417. // Try to find load and store instructions which recalculate addresses already
  418. // calculated by some LEA and replace their memory operands with its def
  419. // register.
  420. bool X86OptimizeLEAPass::removeRedundantAddrCalc(MemOpMap &LEAs) {
  421. bool Changed = false;
  422. assert(!LEAs.empty());
  423. MachineBasicBlock *MBB = (*LEAs.begin()->second.begin())->getParent();
  424. // Process all instructions in basic block.
  425. for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) {
  426. // Instruction must be load or store.
  427. if (!MI.mayLoadOrStore())
  428. continue;
  429. // Get the number of the first memory operand.
  430. const MCInstrDesc &Desc = MI.getDesc();
  431. int MemOpNo = X86II::getMemoryOperandNo(Desc.TSFlags);
  432. // If instruction has no memory operand - skip it.
  433. if (MemOpNo < 0)
  434. continue;
  435. MemOpNo += X86II::getOperandBias(Desc);
  436. // Do not call chooseBestLEA if there was no matching LEA
  437. auto Insns = LEAs.find(getMemOpKey(MI, MemOpNo));
  438. if (Insns == LEAs.end())
  439. continue;
  440. // Get the best LEA instruction to replace address calculation.
  441. MachineInstr *DefMI;
  442. int64_t AddrDispShift;
  443. int Dist;
  444. if (!chooseBestLEA(Insns->second, MI, DefMI, AddrDispShift, Dist))
  445. continue;
  446. // If LEA occurs before current instruction, we can freely replace
  447. // the instruction. If LEA occurs after, we can lift LEA above the
  448. // instruction and this way to be able to replace it. Since LEA and the
  449. // instruction have similar memory operands (thus, the same def
  450. // instructions for these operands), we can always do that, without
  451. // worries of using registers before their defs.
  452. if (Dist < 0) {
  453. DefMI->removeFromParent();
  454. MBB->insert(MachineBasicBlock::iterator(&MI), DefMI);
  455. InstrPos[DefMI] = InstrPos[&MI] - 1;
  456. // Make sure the instructions' position numbers are sane.
  457. assert(((InstrPos[DefMI] == 1 &&
  458. MachineBasicBlock::iterator(DefMI) == MBB->begin()) ||
  459. InstrPos[DefMI] >
  460. InstrPos[&*std::prev(MachineBasicBlock::iterator(DefMI))]) &&
  461. "Instruction positioning is broken");
  462. }
  463. // Since we can possibly extend register lifetime, clear kill flags.
  464. MRI->clearKillFlags(DefMI->getOperand(0).getReg());
  465. ++NumSubstLEAs;
  466. LLVM_DEBUG(dbgs() << "OptimizeLEAs: Candidate to replace: "; MI.dump(););
  467. // Change instruction operands.
  468. MI.getOperand(MemOpNo + X86::AddrBaseReg)
  469. .ChangeToRegister(DefMI->getOperand(0).getReg(), false);
  470. MI.getOperand(MemOpNo + X86::AddrScaleAmt).ChangeToImmediate(1);
  471. MI.getOperand(MemOpNo + X86::AddrIndexReg)
  472. .ChangeToRegister(X86::NoRegister, false);
  473. MI.getOperand(MemOpNo + X86::AddrDisp).ChangeToImmediate(AddrDispShift);
  474. MI.getOperand(MemOpNo + X86::AddrSegmentReg)
  475. .ChangeToRegister(X86::NoRegister, false);
  476. LLVM_DEBUG(dbgs() << "OptimizeLEAs: Replaced by: "; MI.dump(););
  477. Changed = true;
  478. }
  479. return Changed;
  480. }
  481. MachineInstr *X86OptimizeLEAPass::replaceDebugValue(MachineInstr &MI,
  482. unsigned OldReg,
  483. unsigned NewReg,
  484. int64_t AddrDispShift) {
  485. const DIExpression *Expr = MI.getDebugExpression();
  486. if (AddrDispShift != 0) {
  487. if (MI.isNonListDebugValue()) {
  488. Expr =
  489. DIExpression::prepend(Expr, DIExpression::StackValue, AddrDispShift);
  490. } else {
  491. // Update the Expression, appending an offset of `AddrDispShift` to the
  492. // Op corresponding to `OldReg`.
  493. SmallVector<uint64_t, 3> Ops;
  494. DIExpression::appendOffset(Ops, AddrDispShift);
  495. for (MachineOperand &Op : MI.getDebugOperandsForReg(OldReg)) {
  496. unsigned OpIdx = MI.getDebugOperandIndex(&Op);
  497. Expr = DIExpression::appendOpsToArg(Expr, Ops, OpIdx);
  498. }
  499. }
  500. }
  501. // Replace DBG_VALUE instruction with modified version.
  502. MachineBasicBlock *MBB = MI.getParent();
  503. DebugLoc DL = MI.getDebugLoc();
  504. bool IsIndirect = MI.isIndirectDebugValue();
  505. const MDNode *Var = MI.getDebugVariable();
  506. unsigned Opcode = MI.isNonListDebugValue() ? TargetOpcode::DBG_VALUE
  507. : TargetOpcode::DBG_VALUE_LIST;
  508. if (IsIndirect)
  509. assert(MI.getDebugOffset().getImm() == 0 &&
  510. "DBG_VALUE with nonzero offset");
  511. SmallVector<MachineOperand, 4> NewOps;
  512. // If we encounter an operand using the old register, replace it with an
  513. // operand that uses the new register; otherwise keep the old operand.
  514. auto replaceOldReg = [OldReg, NewReg](const MachineOperand &Op) {
  515. if (Op.isReg() && Op.getReg() == OldReg)
  516. return MachineOperand::CreateReg(NewReg, false, false, false, false,
  517. false, false, false, false, false,
  518. /*IsRenamable*/ true);
  519. return Op;
  520. };
  521. for (const MachineOperand &Op : MI.debug_operands())
  522. NewOps.push_back(replaceOldReg(Op));
  523. return BuildMI(*MBB, MBB->erase(&MI), DL, TII->get(Opcode), IsIndirect,
  524. NewOps, Var, Expr);
  525. }
  526. // Try to find similar LEAs in the list and replace one with another.
  527. bool X86OptimizeLEAPass::removeRedundantLEAs(MemOpMap &LEAs) {
  528. bool Changed = false;
  529. // Loop over all entries in the table.
  530. for (auto &E : LEAs) {
  531. auto &List = E.second;
  532. // Loop over all LEA pairs.
  533. auto I1 = List.begin();
  534. while (I1 != List.end()) {
  535. MachineInstr &First = **I1;
  536. auto I2 = std::next(I1);
  537. while (I2 != List.end()) {
  538. MachineInstr &Last = **I2;
  539. int64_t AddrDispShift;
  540. // LEAs should be in occurrence order in the list, so we can freely
  541. // replace later LEAs with earlier ones.
  542. assert(calcInstrDist(First, Last) > 0 &&
  543. "LEAs must be in occurrence order in the list");
  544. // Check that the Last LEA instruction can be replaced by the First.
  545. if (!isReplaceable(First, Last, AddrDispShift)) {
  546. ++I2;
  547. continue;
  548. }
  549. // Loop over all uses of the Last LEA and update their operands. Note
  550. // that the correctness of this has already been checked in the
  551. // isReplaceable function.
  552. Register FirstVReg = First.getOperand(0).getReg();
  553. Register LastVReg = Last.getOperand(0).getReg();
  554. // We use MRI->use_empty here instead of the combination of
  555. // llvm::make_early_inc_range and MRI->use_operands because we could
  556. // replace two or more uses in a debug instruction in one iteration, and
  557. // that would deeply confuse llvm::make_early_inc_range.
  558. while (!MRI->use_empty(LastVReg)) {
  559. MachineOperand &MO = *MRI->use_begin(LastVReg);
  560. MachineInstr &MI = *MO.getParent();
  561. if (MI.isDebugValue()) {
  562. // Replace DBG_VALUE instruction with modified version using the
  563. // register from the replacing LEA and the address displacement
  564. // between the LEA instructions.
  565. replaceDebugValue(MI, LastVReg, FirstVReg, AddrDispShift);
  566. continue;
  567. }
  568. // Get the number of the first memory operand.
  569. const MCInstrDesc &Desc = MI.getDesc();
  570. int MemOpNo =
  571. X86II::getMemoryOperandNo(Desc.TSFlags) +
  572. X86II::getOperandBias(Desc);
  573. // Update address base.
  574. MO.setReg(FirstVReg);
  575. // Update address disp.
  576. MachineOperand &Op = MI.getOperand(MemOpNo + X86::AddrDisp);
  577. if (Op.isImm())
  578. Op.setImm(Op.getImm() + AddrDispShift);
  579. else if (!Op.isJTI())
  580. Op.setOffset(Op.getOffset() + AddrDispShift);
  581. }
  582. // Since we can possibly extend register lifetime, clear kill flags.
  583. MRI->clearKillFlags(FirstVReg);
  584. ++NumRedundantLEAs;
  585. LLVM_DEBUG(dbgs() << "OptimizeLEAs: Remove redundant LEA: ";
  586. Last.dump(););
  587. // By this moment, all of the Last LEA's uses must be replaced. So we
  588. // can freely remove it.
  589. assert(MRI->use_empty(LastVReg) &&
  590. "The LEA's def register must have no uses");
  591. Last.eraseFromParent();
  592. // Erase removed LEA from the list.
  593. I2 = List.erase(I2);
  594. Changed = true;
  595. }
  596. ++I1;
  597. }
  598. }
  599. return Changed;
  600. }
  601. bool X86OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) {
  602. bool Changed = false;
  603. if (DisableX86LEAOpt || skipFunction(MF.getFunction()))
  604. return false;
  605. MRI = &MF.getRegInfo();
  606. TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
  607. TRI = MF.getSubtarget<X86Subtarget>().getRegisterInfo();
  608. auto *PSI =
  609. &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
  610. auto *MBFI = (PSI && PSI->hasProfileSummary()) ?
  611. &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() :
  612. nullptr;
  613. // Process all basic blocks.
  614. for (auto &MBB : MF) {
  615. MemOpMap LEAs;
  616. InstrPos.clear();
  617. // Find all LEA instructions in basic block.
  618. findLEAs(MBB, LEAs);
  619. // If current basic block has no LEAs, move on to the next one.
  620. if (LEAs.empty())
  621. continue;
  622. // Remove redundant LEA instructions.
  623. Changed |= removeRedundantLEAs(LEAs);
  624. // Remove redundant address calculations. Do it only for -Os/-Oz since only
  625. // a code size gain is expected from this part of the pass.
  626. bool OptForSize = MF.getFunction().hasOptSize() ||
  627. llvm::shouldOptimizeForSize(&MBB, PSI, MBFI);
  628. if (OptForSize)
  629. Changed |= removeRedundantAddrCalc(LEAs);
  630. }
  631. return Changed;
  632. }