X86EvexToVex.cpp 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281
  1. //===- X86EvexToVex.cpp ---------------------------------------------------===//
  2. // Compress EVEX instructions to VEX encoding when possible to reduce code size
  3. //
  4. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  5. // See https://llvm.org/LICENSE.txt for license information.
  6. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. /// \file
  11. /// This file defines the pass that goes over all AVX-512 instructions which
  12. /// are encoded using the EVEX prefix and if possible replaces them by their
  13. /// corresponding VEX encoding which is usually shorter by 2 bytes.
  14. /// EVEX instructions may be encoded via the VEX prefix when the AVX-512
  15. /// instruction has a corresponding AVX/AVX2 opcode, when vector length
  16. /// accessed by instruction is less than 512 bits and when it does not use
  17. // the xmm or the mask registers or xmm/ymm registers with indexes higher than 15.
  18. /// The pass applies code reduction on the generated code for AVX-512 instrs.
  19. //
  20. //===----------------------------------------------------------------------===//
  21. #include "MCTargetDesc/X86BaseInfo.h"
  22. #include "MCTargetDesc/X86InstComments.h"
  23. #include "X86.h"
  24. #include "X86InstrInfo.h"
  25. #include "X86Subtarget.h"
  26. #include "llvm/ADT/StringRef.h"
  27. #include "llvm/CodeGen/MachineFunction.h"
  28. #include "llvm/CodeGen/MachineFunctionPass.h"
  29. #include "llvm/CodeGen/MachineInstr.h"
  30. #include "llvm/CodeGen/MachineOperand.h"
  31. #include "llvm/MC/MCInstrDesc.h"
  32. #include "llvm/Pass.h"
  33. #include <atomic>
  34. #include <cassert>
  35. #include <cstdint>
  36. using namespace llvm;
  37. // Including the generated EVEX2VEX tables.
  38. struct X86EvexToVexCompressTableEntry {
  39. uint16_t EvexOpcode;
  40. uint16_t VexOpcode;
  41. bool operator<(const X86EvexToVexCompressTableEntry &RHS) const {
  42. return EvexOpcode < RHS.EvexOpcode;
  43. }
  44. friend bool operator<(const X86EvexToVexCompressTableEntry &TE,
  45. unsigned Opc) {
  46. return TE.EvexOpcode < Opc;
  47. }
  48. };
  49. #include "X86GenEVEX2VEXTables.inc"
  50. #define EVEX2VEX_DESC "Compressing EVEX instrs to VEX encoding when possible"
  51. #define EVEX2VEX_NAME "x86-evex-to-vex-compress"
  52. #define DEBUG_TYPE EVEX2VEX_NAME
  53. namespace {
  54. class EvexToVexInstPass : public MachineFunctionPass {
  55. /// For EVEX instructions that can be encoded using VEX encoding, replace
  56. /// them by the VEX encoding in order to reduce size.
  57. bool CompressEvexToVexImpl(MachineInstr &MI) const;
  58. public:
  59. static char ID;
  60. EvexToVexInstPass() : MachineFunctionPass(ID) { }
  61. StringRef getPassName() const override { return EVEX2VEX_DESC; }
  62. /// Loop over all of the basic blocks, replacing EVEX instructions
  63. /// by equivalent VEX instructions when possible for reducing code size.
  64. bool runOnMachineFunction(MachineFunction &MF) override;
  65. // This pass runs after regalloc and doesn't support VReg operands.
  66. MachineFunctionProperties getRequiredProperties() const override {
  67. return MachineFunctionProperties().set(
  68. MachineFunctionProperties::Property::NoVRegs);
  69. }
  70. private:
  71. /// Machine instruction info used throughout the class.
  72. const X86InstrInfo *TII = nullptr;
  73. const X86Subtarget *ST = nullptr;
  74. };
  75. } // end anonymous namespace
  76. char EvexToVexInstPass::ID = 0;
  77. bool EvexToVexInstPass::runOnMachineFunction(MachineFunction &MF) {
  78. TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
  79. ST = &MF.getSubtarget<X86Subtarget>();
  80. if (!ST->hasAVX512())
  81. return false;
  82. bool Changed = false;
  83. /// Go over all basic blocks in function and replace
  84. /// EVEX encoded instrs by VEX encoding when possible.
  85. for (MachineBasicBlock &MBB : MF) {
  86. // Traverse the basic block.
  87. for (MachineInstr &MI : MBB)
  88. Changed |= CompressEvexToVexImpl(MI);
  89. }
  90. return Changed;
  91. }
  92. static bool usesExtendedRegister(const MachineInstr &MI) {
  93. auto isHiRegIdx = [](unsigned Reg) {
  94. // Check for XMM register with indexes between 16 - 31.
  95. if (Reg >= X86::XMM16 && Reg <= X86::XMM31)
  96. return true;
  97. // Check for YMM register with indexes between 16 - 31.
  98. if (Reg >= X86::YMM16 && Reg <= X86::YMM31)
  99. return true;
  100. return false;
  101. };
  102. // Check that operands are not ZMM regs or
  103. // XMM/YMM regs with hi indexes between 16 - 31.
  104. for (const MachineOperand &MO : MI.explicit_operands()) {
  105. if (!MO.isReg())
  106. continue;
  107. Register Reg = MO.getReg();
  108. assert(!(Reg >= X86::ZMM0 && Reg <= X86::ZMM31) &&
  109. "ZMM instructions should not be in the EVEX->VEX tables");
  110. if (isHiRegIdx(Reg))
  111. return true;
  112. }
  113. return false;
  114. }
  115. // Do any custom cleanup needed to finalize the conversion.
  116. static bool performCustomAdjustments(MachineInstr &MI, unsigned NewOpc,
  117. const X86Subtarget *ST) {
  118. (void)NewOpc;
  119. unsigned Opc = MI.getOpcode();
  120. switch (Opc) {
  121. case X86::VALIGNDZ128rri:
  122. case X86::VALIGNDZ128rmi:
  123. case X86::VALIGNQZ128rri:
  124. case X86::VALIGNQZ128rmi: {
  125. assert((NewOpc == X86::VPALIGNRrri || NewOpc == X86::VPALIGNRrmi) &&
  126. "Unexpected new opcode!");
  127. unsigned Scale = (Opc == X86::VALIGNQZ128rri ||
  128. Opc == X86::VALIGNQZ128rmi) ? 8 : 4;
  129. MachineOperand &Imm = MI.getOperand(MI.getNumExplicitOperands()-1);
  130. Imm.setImm(Imm.getImm() * Scale);
  131. break;
  132. }
  133. case X86::VSHUFF32X4Z256rmi:
  134. case X86::VSHUFF32X4Z256rri:
  135. case X86::VSHUFF64X2Z256rmi:
  136. case X86::VSHUFF64X2Z256rri:
  137. case X86::VSHUFI32X4Z256rmi:
  138. case X86::VSHUFI32X4Z256rri:
  139. case X86::VSHUFI64X2Z256rmi:
  140. case X86::VSHUFI64X2Z256rri: {
  141. assert((NewOpc == X86::VPERM2F128rr || NewOpc == X86::VPERM2I128rr ||
  142. NewOpc == X86::VPERM2F128rm || NewOpc == X86::VPERM2I128rm) &&
  143. "Unexpected new opcode!");
  144. MachineOperand &Imm = MI.getOperand(MI.getNumExplicitOperands()-1);
  145. int64_t ImmVal = Imm.getImm();
  146. // Set bit 5, move bit 1 to bit 4, copy bit 0.
  147. Imm.setImm(0x20 | ((ImmVal & 2) << 3) | (ImmVal & 1));
  148. break;
  149. }
  150. case X86::VRNDSCALEPDZ128rri:
  151. case X86::VRNDSCALEPDZ128rmi:
  152. case X86::VRNDSCALEPSZ128rri:
  153. case X86::VRNDSCALEPSZ128rmi:
  154. case X86::VRNDSCALEPDZ256rri:
  155. case X86::VRNDSCALEPDZ256rmi:
  156. case X86::VRNDSCALEPSZ256rri:
  157. case X86::VRNDSCALEPSZ256rmi:
  158. case X86::VRNDSCALESDZr:
  159. case X86::VRNDSCALESDZm:
  160. case X86::VRNDSCALESSZr:
  161. case X86::VRNDSCALESSZm:
  162. case X86::VRNDSCALESDZr_Int:
  163. case X86::VRNDSCALESDZm_Int:
  164. case X86::VRNDSCALESSZr_Int:
  165. case X86::VRNDSCALESSZm_Int:
  166. const MachineOperand &Imm = MI.getOperand(MI.getNumExplicitOperands()-1);
  167. int64_t ImmVal = Imm.getImm();
  168. // Ensure that only bits 3:0 of the immediate are used.
  169. if ((ImmVal & 0xf) != ImmVal)
  170. return false;
  171. break;
  172. }
  173. return true;
  174. }
  175. // For EVEX instructions that can be encoded using VEX encoding
  176. // replace them by the VEX encoding in order to reduce size.
  177. bool EvexToVexInstPass::CompressEvexToVexImpl(MachineInstr &MI) const {
  178. // VEX format.
  179. // # of bytes: 0,2,3 1 1 0,1 0,1,2,4 0,1
  180. // [Prefixes] [VEX] OPCODE ModR/M [SIB] [DISP] [IMM]
  181. //
  182. // EVEX format.
  183. // # of bytes: 4 1 1 1 4 / 1 1
  184. // [Prefixes] EVEX Opcode ModR/M [SIB] [Disp32] / [Disp8*N] [Immediate]
  185. const MCInstrDesc &Desc = MI.getDesc();
  186. // Check for EVEX instructions only.
  187. if ((Desc.TSFlags & X86II::EncodingMask) != X86II::EVEX)
  188. return false;
  189. // Check for EVEX instructions with mask or broadcast as in these cases
  190. // the EVEX prefix is needed in order to carry this information
  191. // thus preventing the transformation to VEX encoding.
  192. if (Desc.TSFlags & (X86II::EVEX_K | X86II::EVEX_B))
  193. return false;
  194. // Check for EVEX instructions with L2 set. These instructions are 512-bits
  195. // and can't be converted to VEX.
  196. if (Desc.TSFlags & X86II::EVEX_L2)
  197. return false;
  198. #ifndef NDEBUG
  199. // Make sure the tables are sorted.
  200. static std::atomic<bool> TableChecked(false);
  201. if (!TableChecked.load(std::memory_order_relaxed)) {
  202. assert(llvm::is_sorted(X86EvexToVex128CompressTable) &&
  203. "X86EvexToVex128CompressTable is not sorted!");
  204. assert(llvm::is_sorted(X86EvexToVex256CompressTable) &&
  205. "X86EvexToVex256CompressTable is not sorted!");
  206. TableChecked.store(true, std::memory_order_relaxed);
  207. }
  208. #endif
  209. // Use the VEX.L bit to select the 128 or 256-bit table.
  210. ArrayRef<X86EvexToVexCompressTableEntry> Table =
  211. (Desc.TSFlags & X86II::VEX_L) ? ArrayRef(X86EvexToVex256CompressTable)
  212. : ArrayRef(X86EvexToVex128CompressTable);
  213. const auto *I = llvm::lower_bound(Table, MI.getOpcode());
  214. if (I == Table.end() || I->EvexOpcode != MI.getOpcode())
  215. return false;
  216. unsigned NewOpc = I->VexOpcode;
  217. if (usesExtendedRegister(MI))
  218. return false;
  219. if (!CheckVEXInstPredicate(MI, ST))
  220. return false;
  221. if (!performCustomAdjustments(MI, NewOpc, ST))
  222. return false;
  223. MI.setDesc(TII->get(NewOpc));
  224. MI.setAsmPrinterFlag(X86::AC_EVEX_2_VEX);
  225. return true;
  226. }
  227. INITIALIZE_PASS(EvexToVexInstPass, EVEX2VEX_NAME, EVEX2VEX_DESC, false, false)
  228. FunctionPass *llvm::createX86EvexToVexInsts() {
  229. return new EvexToVexInstPass();
  230. }