X86EvexToVex.cpp 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
  1. //===- X86EvexToVex.cpp ---------------------------------------------------===//
  2. // Compress EVEX instructions to VEX encoding when possible to reduce code size
  3. //
  4. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  5. // See https://llvm.org/LICENSE.txt for license information.
  6. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. /// \file
  11. /// This file defines the pass that goes over all AVX-512 instructions which
  12. /// are encoded using the EVEX prefix and if possible replaces them by their
  13. /// corresponding VEX encoding which is usually shorter by 2 bytes.
  14. /// EVEX instructions may be encoded via the VEX prefix when the AVX-512
  15. /// instruction has a corresponding AVX/AVX2 opcode, when vector length
  16. /// accessed by instruction is less than 512 bits and when it does not use
  17. // the xmm or the mask registers or xmm/ymm registers with indexes higher than 15.
  18. /// The pass applies code reduction on the generated code for AVX-512 instrs.
  19. //
  20. //===----------------------------------------------------------------------===//
  21. #include "MCTargetDesc/X86BaseInfo.h"
  22. #include "MCTargetDesc/X86InstComments.h"
  23. #include "X86.h"
  24. #include "X86InstrInfo.h"
  25. #include "X86Subtarget.h"
  26. #include "llvm/ADT/StringRef.h"
  27. #include "llvm/CodeGen/MachineFunction.h"
  28. #include "llvm/CodeGen/MachineFunctionPass.h"
  29. #include "llvm/CodeGen/MachineInstr.h"
  30. #include "llvm/CodeGen/MachineOperand.h"
  31. #include "llvm/MC/MCInstrDesc.h"
  32. #include "llvm/Pass.h"
  33. #include <cassert>
  34. #include <cstdint>
  35. using namespace llvm;
  36. // Including the generated EVEX2VEX tables.
  37. struct X86EvexToVexCompressTableEntry {
  38. uint16_t EvexOpcode;
  39. uint16_t VexOpcode;
  40. bool operator<(const X86EvexToVexCompressTableEntry &RHS) const {
  41. return EvexOpcode < RHS.EvexOpcode;
  42. }
  43. friend bool operator<(const X86EvexToVexCompressTableEntry &TE,
  44. unsigned Opc) {
  45. return TE.EvexOpcode < Opc;
  46. }
  47. };
  48. #include "X86GenEVEX2VEXTables.inc"
  49. #define EVEX2VEX_DESC "Compressing EVEX instrs to VEX encoding when possible"
  50. #define EVEX2VEX_NAME "x86-evex-to-vex-compress"
  51. #define DEBUG_TYPE EVEX2VEX_NAME
  52. namespace {
  53. class EvexToVexInstPass : public MachineFunctionPass {
  54. /// For EVEX instructions that can be encoded using VEX encoding, replace
  55. /// them by the VEX encoding in order to reduce size.
  56. bool CompressEvexToVexImpl(MachineInstr &MI) const;
  57. public:
  58. static char ID;
  59. EvexToVexInstPass() : MachineFunctionPass(ID) { }
  60. StringRef getPassName() const override { return EVEX2VEX_DESC; }
  61. /// Loop over all of the basic blocks, replacing EVEX instructions
  62. /// by equivalent VEX instructions when possible for reducing code size.
  63. bool runOnMachineFunction(MachineFunction &MF) override;
  64. // This pass runs after regalloc and doesn't support VReg operands.
  65. MachineFunctionProperties getRequiredProperties() const override {
  66. return MachineFunctionProperties().set(
  67. MachineFunctionProperties::Property::NoVRegs);
  68. }
  69. private:
  70. /// Machine instruction info used throughout the class.
  71. const X86InstrInfo *TII = nullptr;
  72. const X86Subtarget *ST = nullptr;
  73. };
  74. } // end anonymous namespace
  75. char EvexToVexInstPass::ID = 0;
  76. bool EvexToVexInstPass::runOnMachineFunction(MachineFunction &MF) {
  77. TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
  78. ST = &MF.getSubtarget<X86Subtarget>();
  79. if (!ST->hasAVX512())
  80. return false;
  81. bool Changed = false;
  82. /// Go over all basic blocks in function and replace
  83. /// EVEX encoded instrs by VEX encoding when possible.
  84. for (MachineBasicBlock &MBB : MF) {
  85. // Traverse the basic block.
  86. for (MachineInstr &MI : MBB)
  87. Changed |= CompressEvexToVexImpl(MI);
  88. }
  89. return Changed;
  90. }
  91. static bool usesExtendedRegister(const MachineInstr &MI) {
  92. auto isHiRegIdx = [](unsigned Reg) {
  93. // Check for XMM register with indexes between 16 - 31.
  94. if (Reg >= X86::XMM16 && Reg <= X86::XMM31)
  95. return true;
  96. // Check for YMM register with indexes between 16 - 31.
  97. if (Reg >= X86::YMM16 && Reg <= X86::YMM31)
  98. return true;
  99. return false;
  100. };
  101. // Check that operands are not ZMM regs or
  102. // XMM/YMM regs with hi indexes between 16 - 31.
  103. for (const MachineOperand &MO : MI.explicit_operands()) {
  104. if (!MO.isReg())
  105. continue;
  106. Register Reg = MO.getReg();
  107. assert(!(Reg >= X86::ZMM0 && Reg <= X86::ZMM31) &&
  108. "ZMM instructions should not be in the EVEX->VEX tables");
  109. if (isHiRegIdx(Reg))
  110. return true;
  111. }
  112. return false;
  113. }
  114. // Do any custom cleanup needed to finalize the conversion.
  115. static bool performCustomAdjustments(MachineInstr &MI, unsigned NewOpc,
  116. const X86Subtarget *ST) {
  117. (void)NewOpc;
  118. unsigned Opc = MI.getOpcode();
  119. switch (Opc) {
  120. case X86::VALIGNDZ128rri:
  121. case X86::VALIGNDZ128rmi:
  122. case X86::VALIGNQZ128rri:
  123. case X86::VALIGNQZ128rmi: {
  124. assert((NewOpc == X86::VPALIGNRrri || NewOpc == X86::VPALIGNRrmi) &&
  125. "Unexpected new opcode!");
  126. unsigned Scale = (Opc == X86::VALIGNQZ128rri ||
  127. Opc == X86::VALIGNQZ128rmi) ? 8 : 4;
  128. MachineOperand &Imm = MI.getOperand(MI.getNumExplicitOperands()-1);
  129. Imm.setImm(Imm.getImm() * Scale);
  130. break;
  131. }
  132. case X86::VSHUFF32X4Z256rmi:
  133. case X86::VSHUFF32X4Z256rri:
  134. case X86::VSHUFF64X2Z256rmi:
  135. case X86::VSHUFF64X2Z256rri:
  136. case X86::VSHUFI32X4Z256rmi:
  137. case X86::VSHUFI32X4Z256rri:
  138. case X86::VSHUFI64X2Z256rmi:
  139. case X86::VSHUFI64X2Z256rri: {
  140. assert((NewOpc == X86::VPERM2F128rr || NewOpc == X86::VPERM2I128rr ||
  141. NewOpc == X86::VPERM2F128rm || NewOpc == X86::VPERM2I128rm) &&
  142. "Unexpected new opcode!");
  143. MachineOperand &Imm = MI.getOperand(MI.getNumExplicitOperands()-1);
  144. int64_t ImmVal = Imm.getImm();
  145. // Set bit 5, move bit 1 to bit 4, copy bit 0.
  146. Imm.setImm(0x20 | ((ImmVal & 2) << 3) | (ImmVal & 1));
  147. break;
  148. }
  149. case X86::VRNDSCALEPDZ128rri:
  150. case X86::VRNDSCALEPDZ128rmi:
  151. case X86::VRNDSCALEPSZ128rri:
  152. case X86::VRNDSCALEPSZ128rmi:
  153. case X86::VRNDSCALEPDZ256rri:
  154. case X86::VRNDSCALEPDZ256rmi:
  155. case X86::VRNDSCALEPSZ256rri:
  156. case X86::VRNDSCALEPSZ256rmi:
  157. case X86::VRNDSCALESDZr:
  158. case X86::VRNDSCALESDZm:
  159. case X86::VRNDSCALESSZr:
  160. case X86::VRNDSCALESSZm:
  161. case X86::VRNDSCALESDZr_Int:
  162. case X86::VRNDSCALESDZm_Int:
  163. case X86::VRNDSCALESSZr_Int:
  164. case X86::VRNDSCALESSZm_Int:
  165. const MachineOperand &Imm = MI.getOperand(MI.getNumExplicitOperands()-1);
  166. int64_t ImmVal = Imm.getImm();
  167. // Ensure that only bits 3:0 of the immediate are used.
  168. if ((ImmVal & 0xf) != ImmVal)
  169. return false;
  170. break;
  171. }
  172. return true;
  173. }
  174. // For EVEX instructions that can be encoded using VEX encoding
  175. // replace them by the VEX encoding in order to reduce size.
  176. bool EvexToVexInstPass::CompressEvexToVexImpl(MachineInstr &MI) const {
  177. // VEX format.
  178. // # of bytes: 0,2,3 1 1 0,1 0,1,2,4 0,1
  179. // [Prefixes] [VEX] OPCODE ModR/M [SIB] [DISP] [IMM]
  180. //
  181. // EVEX format.
  182. // # of bytes: 4 1 1 1 4 / 1 1
  183. // [Prefixes] EVEX Opcode ModR/M [SIB] [Disp32] / [Disp8*N] [Immediate]
  184. const MCInstrDesc &Desc = MI.getDesc();
  185. // Check for EVEX instructions only.
  186. if ((Desc.TSFlags & X86II::EncodingMask) != X86II::EVEX)
  187. return false;
  188. // Check for EVEX instructions with mask or broadcast as in these cases
  189. // the EVEX prefix is needed in order to carry this information
  190. // thus preventing the transformation to VEX encoding.
  191. if (Desc.TSFlags & (X86II::EVEX_K | X86II::EVEX_B))
  192. return false;
  193. // Check for EVEX instructions with L2 set. These instructions are 512-bits
  194. // and can't be converted to VEX.
  195. if (Desc.TSFlags & X86II::EVEX_L2)
  196. return false;
  197. #ifndef NDEBUG
  198. // Make sure the tables are sorted.
  199. static std::atomic<bool> TableChecked(false);
  200. if (!TableChecked.load(std::memory_order_relaxed)) {
  201. assert(llvm::is_sorted(X86EvexToVex128CompressTable) &&
  202. "X86EvexToVex128CompressTable is not sorted!");
  203. assert(llvm::is_sorted(X86EvexToVex256CompressTable) &&
  204. "X86EvexToVex256CompressTable is not sorted!");
  205. TableChecked.store(true, std::memory_order_relaxed);
  206. }
  207. #endif
  208. // Use the VEX.L bit to select the 128 or 256-bit table.
  209. ArrayRef<X86EvexToVexCompressTableEntry> Table =
  210. (Desc.TSFlags & X86II::VEX_L) ? makeArrayRef(X86EvexToVex256CompressTable)
  211. : makeArrayRef(X86EvexToVex128CompressTable);
  212. const auto *I = llvm::lower_bound(Table, MI.getOpcode());
  213. if (I == Table.end() || I->EvexOpcode != MI.getOpcode())
  214. return false;
  215. unsigned NewOpc = I->VexOpcode;
  216. if (usesExtendedRegister(MI))
  217. return false;
  218. if (!CheckVEXInstPredicate(MI, ST))
  219. return false;
  220. if (!performCustomAdjustments(MI, NewOpc, ST))
  221. return false;
  222. MI.setDesc(TII->get(NewOpc));
  223. MI.setAsmPrinterFlag(X86::AC_EVEX_2_VEX);
  224. return true;
  225. }
  226. INITIALIZE_PASS(EvexToVexInstPass, EVEX2VEX_NAME, EVEX2VEX_DESC, false, false)
  227. FunctionPass *llvm::createX86EvexToVexInsts() {
  228. return new EvexToVexInstPass();
  229. }