X86InstrFMA3Info.cpp 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178
  1. //===-- X86InstrFMA3Info.cpp - X86 FMA3 Instruction Information -----------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file contains the implementation of the classes providing information
  10. // about existing X86 FMA3 opcodes, classifying and grouping them.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "X86InstrFMA3Info.h"
  14. #include "X86InstrInfo.h"
  15. #include "llvm/Support/ManagedStatic.h"
  16. #include "llvm/Support/Threading.h"
  17. #include <cassert>
  18. #include <cstdint>
  19. using namespace llvm;
  20. #define FMA3GROUP(Name, Suf, Attrs) \
  21. { { X86::Name##132##Suf, X86::Name##213##Suf, X86::Name##231##Suf }, Attrs },
  22. #define FMA3GROUP_MASKED(Name, Suf, Attrs) \
  23. FMA3GROUP(Name, Suf, Attrs) \
  24. FMA3GROUP(Name, Suf##k, Attrs | X86InstrFMA3Group::KMergeMasked) \
  25. FMA3GROUP(Name, Suf##kz, Attrs | X86InstrFMA3Group::KZeroMasked)
  26. #define FMA3GROUP_PACKED_WIDTHS_Z(Name, Suf, Attrs) \
  27. FMA3GROUP_MASKED(Name, Suf##Z128m, Attrs) \
  28. FMA3GROUP_MASKED(Name, Suf##Z128r, Attrs) \
  29. FMA3GROUP_MASKED(Name, Suf##Z256m, Attrs) \
  30. FMA3GROUP_MASKED(Name, Suf##Z256r, Attrs) \
  31. FMA3GROUP_MASKED(Name, Suf##Zm, Attrs) \
  32. FMA3GROUP_MASKED(Name, Suf##Zr, Attrs) \
  33. #define FMA3GROUP_PACKED_WIDTHS_ALL(Name, Suf, Attrs) \
  34. FMA3GROUP(Name, Suf##Ym, Attrs) \
  35. FMA3GROUP(Name, Suf##Yr, Attrs) \
  36. FMA3GROUP_PACKED_WIDTHS_Z(Name, Suf, Attrs) \
  37. FMA3GROUP(Name, Suf##m, Attrs) \
  38. FMA3GROUP(Name, Suf##r, Attrs)
  39. #define FMA3GROUP_PACKED(Name, Attrs) \
  40. FMA3GROUP_PACKED_WIDTHS_ALL(Name, PD, Attrs) \
  41. FMA3GROUP_PACKED_WIDTHS_Z(Name, PH, Attrs) \
  42. FMA3GROUP_PACKED_WIDTHS_ALL(Name, PS, Attrs)
  43. #define FMA3GROUP_SCALAR_WIDTHS_Z(Name, Suf, Attrs) \
  44. FMA3GROUP(Name, Suf##Zm, Attrs) \
  45. FMA3GROUP_MASKED(Name, Suf##Zm_Int, Attrs | X86InstrFMA3Group::Intrinsic) \
  46. FMA3GROUP(Name, Suf##Zr, Attrs) \
  47. FMA3GROUP_MASKED(Name, Suf##Zr_Int, Attrs | X86InstrFMA3Group::Intrinsic) \
  48. #define FMA3GROUP_SCALAR_WIDTHS_ALL(Name, Suf, Attrs) \
  49. FMA3GROUP_SCALAR_WIDTHS_Z(Name, Suf, Attrs) \
  50. FMA3GROUP(Name, Suf##m, Attrs) \
  51. FMA3GROUP(Name, Suf##m_Int, Attrs | X86InstrFMA3Group::Intrinsic) \
  52. FMA3GROUP(Name, Suf##r, Attrs) \
  53. FMA3GROUP(Name, Suf##r_Int, Attrs | X86InstrFMA3Group::Intrinsic)
  54. #define FMA3GROUP_SCALAR(Name, Attrs) \
  55. FMA3GROUP_SCALAR_WIDTHS_ALL(Name, SD, Attrs) \
  56. FMA3GROUP_SCALAR_WIDTHS_Z(Name, SH, Attrs) \
  57. FMA3GROUP_SCALAR_WIDTHS_ALL(Name, SS, Attrs)
  58. #define FMA3GROUP_FULL(Name, Attrs) \
  59. FMA3GROUP_PACKED(Name, Attrs) \
  60. FMA3GROUP_SCALAR(Name, Attrs)
  61. static const X86InstrFMA3Group Groups[] = {
  62. FMA3GROUP_FULL(VFMADD, 0)
  63. FMA3GROUP_PACKED(VFMADDSUB, 0)
  64. FMA3GROUP_FULL(VFMSUB, 0)
  65. FMA3GROUP_PACKED(VFMSUBADD, 0)
  66. FMA3GROUP_FULL(VFNMADD, 0)
  67. FMA3GROUP_FULL(VFNMSUB, 0)
  68. };
  69. #define FMA3GROUP_PACKED_AVX512_WIDTHS(Name, Type, Suf, Attrs) \
  70. FMA3GROUP_MASKED(Name, Type##Z128##Suf, Attrs) \
  71. FMA3GROUP_MASKED(Name, Type##Z256##Suf, Attrs) \
  72. FMA3GROUP_MASKED(Name, Type##Z##Suf, Attrs)
  73. #define FMA3GROUP_PACKED_AVX512(Name, Suf, Attrs) \
  74. FMA3GROUP_PACKED_AVX512_WIDTHS(Name, PD, Suf, Attrs) \
  75. FMA3GROUP_PACKED_AVX512_WIDTHS(Name, PH, Suf, Attrs) \
  76. FMA3GROUP_PACKED_AVX512_WIDTHS(Name, PS, Suf, Attrs)
  77. #define FMA3GROUP_PACKED_AVX512_ROUND(Name, Suf, Attrs) \
  78. FMA3GROUP_MASKED(Name, PDZ##Suf, Attrs) \
  79. FMA3GROUP_MASKED(Name, PHZ##Suf, Attrs) \
  80. FMA3GROUP_MASKED(Name, PSZ##Suf, Attrs)
  81. #define FMA3GROUP_SCALAR_AVX512_ROUND(Name, Suf, Attrs) \
  82. FMA3GROUP(Name, SDZ##Suf, Attrs) \
  83. FMA3GROUP_MASKED(Name, SDZ##Suf##_Int, Attrs) \
  84. FMA3GROUP(Name, SHZ##Suf, Attrs) \
  85. FMA3GROUP_MASKED(Name, SHZ##Suf##_Int, Attrs) \
  86. FMA3GROUP(Name, SSZ##Suf, Attrs) \
  87. FMA3GROUP_MASKED(Name, SSZ##Suf##_Int, Attrs)
  88. static const X86InstrFMA3Group BroadcastGroups[] = {
  89. FMA3GROUP_PACKED_AVX512(VFMADD, mb, 0)
  90. FMA3GROUP_PACKED_AVX512(VFMADDSUB, mb, 0)
  91. FMA3GROUP_PACKED_AVX512(VFMSUB, mb, 0)
  92. FMA3GROUP_PACKED_AVX512(VFMSUBADD, mb, 0)
  93. FMA3GROUP_PACKED_AVX512(VFNMADD, mb, 0)
  94. FMA3GROUP_PACKED_AVX512(VFNMSUB, mb, 0)
  95. };
  96. static const X86InstrFMA3Group RoundGroups[] = {
  97. FMA3GROUP_PACKED_AVX512_ROUND(VFMADD, rb, 0)
  98. FMA3GROUP_SCALAR_AVX512_ROUND(VFMADD, rb, X86InstrFMA3Group::Intrinsic)
  99. FMA3GROUP_PACKED_AVX512_ROUND(VFMADDSUB, rb, 0)
  100. FMA3GROUP_PACKED_AVX512_ROUND(VFMSUB, rb, 0)
  101. FMA3GROUP_SCALAR_AVX512_ROUND(VFMSUB, rb, X86InstrFMA3Group::Intrinsic)
  102. FMA3GROUP_PACKED_AVX512_ROUND(VFMSUBADD, rb, 0)
  103. FMA3GROUP_PACKED_AVX512_ROUND(VFNMADD, rb, 0)
  104. FMA3GROUP_SCALAR_AVX512_ROUND(VFNMADD, rb, X86InstrFMA3Group::Intrinsic)
  105. FMA3GROUP_PACKED_AVX512_ROUND(VFNMSUB, rb, 0)
  106. FMA3GROUP_SCALAR_AVX512_ROUND(VFNMSUB, rb, X86InstrFMA3Group::Intrinsic)
  107. };
  108. static void verifyTables() {
  109. #ifndef NDEBUG
  110. static std::atomic<bool> TableChecked(false);
  111. if (!TableChecked.load(std::memory_order_relaxed)) {
  112. assert(llvm::is_sorted(Groups) && llvm::is_sorted(RoundGroups) &&
  113. llvm::is_sorted(BroadcastGroups) && "FMA3 tables not sorted!");
  114. TableChecked.store(true, std::memory_order_relaxed);
  115. }
  116. #endif
  117. }
  118. /// Returns a reference to a group of FMA3 opcodes to where the given
  119. /// \p Opcode is included. If the given \p Opcode is not recognized as FMA3
  120. /// and not included into any FMA3 group, then nullptr is returned.
  121. const X86InstrFMA3Group *llvm::getFMA3Group(unsigned Opcode, uint64_t TSFlags) {
  122. // FMA3 instructions have a well defined encoding pattern we can exploit.
  123. uint8_t BaseOpcode = X86II::getBaseOpcodeFor(TSFlags);
  124. bool IsFMA3Opcode = ((BaseOpcode >= 0x96 && BaseOpcode <= 0x9F) ||
  125. (BaseOpcode >= 0xA6 && BaseOpcode <= 0xAF) ||
  126. (BaseOpcode >= 0xB6 && BaseOpcode <= 0xBF));
  127. bool IsFMA3Encoding = ((TSFlags & X86II::EncodingMask) == X86II::VEX &&
  128. (TSFlags & X86II::OpMapMask) == X86II::T8) ||
  129. ((TSFlags & X86II::EncodingMask) == X86II::EVEX &&
  130. ((TSFlags & X86II::OpMapMask) == X86II::T8 ||
  131. (TSFlags & X86II::OpMapMask) == X86II::T_MAP6));
  132. bool IsFMA3Prefix = (TSFlags & X86II::OpPrefixMask) == X86II::PD;
  133. if (!IsFMA3Opcode || !IsFMA3Encoding || !IsFMA3Prefix)
  134. return nullptr;
  135. verifyTables();
  136. ArrayRef<X86InstrFMA3Group> Table;
  137. if (TSFlags & X86II::EVEX_RC)
  138. Table = makeArrayRef(RoundGroups);
  139. else if (TSFlags & X86II::EVEX_B)
  140. Table = makeArrayRef(BroadcastGroups);
  141. else
  142. Table = makeArrayRef(Groups);
  143. // FMA 132 instructions have an opcode of 0x96-0x9F
  144. // FMA 213 instructions have an opcode of 0xA6-0xAF
  145. // FMA 231 instructions have an opcode of 0xB6-0xBF
  146. unsigned FormIndex = ((BaseOpcode - 0x90) >> 4) & 0x3;
  147. auto I = partition_point(Table, [=](const X86InstrFMA3Group &Group) {
  148. return Group.Opcodes[FormIndex] < Opcode;
  149. });
  150. assert(I != Table.end() && I->Opcodes[FormIndex] == Opcode &&
  151. "Couldn't find FMA3 opcode!");
  152. return I;
  153. }