X86InstrAMX.td 10 KB


  1. //===---- X86InstrAMX.td - AMX Instruction Set Extension --*- tablegen -*--===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file describes the instructions that make up the Intel AMX instruction
  10. // set.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. //===----------------------------------------------------------------------===//
  14. // AMX instructions
  15. let Predicates = [HasAMXTILE, In64BitMode] in {
  16. let SchedRW = [WriteSystem] in {
  17. let hasSideEffects = 1,
  18. Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in
  19. def LDTILECFG : I <0x49, MRM0m, (outs), (ins opaquemem:$src),
  20. "ldtilecfg\t$src",
  21. [(int_x86_ldtilecfg addr:$src)]>, VEX, T8PS;
  22. let hasSideEffects = 1 in
  23. def STTILECFG : I <0x49, MRM0m, (outs), (ins opaquemem:$src),
  24. "sttilecfg\t$src",
  25. [(int_x86_sttilecfg addr:$src)]>, VEX, T8PD;
  26. let mayLoad = 1 in
  27. def TILELOADD : I<0x4b, MRMSrcMemFSIB, (outs TILE:$dst),
  28. (ins sibmem:$src),
  29. "tileloadd\t{$src, $dst|$dst, $src}", []>,
  30. VEX, T8XD;
  31. let mayLoad = 1 in
  32. def TILELOADDT1 : I<0x4b, MRMSrcMemFSIB, (outs TILE:$dst),
  33. (ins sibmem:$src),
  34. "tileloaddt1\t{$src, $dst|$dst, $src}", []>,
  35. VEX, T8PD;
  36. let Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in
  37. def TILERELEASE : I<0x49, MRM_C0, (outs), (ins),
  38. "tilerelease", [(int_x86_tilerelease)]>, VEX, T8PS;
  39. let mayStore = 1 in
  40. def TILESTORED : I<0x4b, MRMDestMemFSIB, (outs),
  41. (ins sibmem:$dst, TILE:$src),
  42. "tilestored\t{$src, $dst|$dst, $src}", []>,
  43. VEX, T8XS;
  44. def TILEZERO : I<0x49, MRMr0, (outs TILE:$dst), (ins),
  45. "tilezero\t$dst", []>,
  46. VEX, T8XD;
  47. // Pseduo instruction for RA.
  48. let isPseudo = true, mayLoad = 1, hasSideEffects = 1,
  49. Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in
  50. def PLDTILECFGV : PseudoI<(outs), (ins opaquemem:$src), []>;
  51. let isPseudo = true, mayLoad = 1 in
  52. def PTILELOADDV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
  53. GR16:$src2,
  54. opaquemem:$src3), []>;
  55. let isPseudo = true, mayLoad = 1 in
  56. def PTILELOADDT1V : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
  57. GR16:$src2,
  58. opaquemem:$src3), []>;
  59. let isPseudo = true, mayStore = 1 in
  60. def PTILESTOREDV : PseudoI<(outs), (ins GR16:$src1,
  61. GR16:$src2, opaquemem:$src3,
  62. TILE:$src4), []>;
  63. let isPseudo = true, isReMaterializable = 1, isAsCheapAsAMove = 1,
  64. canFoldAsLoad = 1 in
  65. def PTILEZEROV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2),
  66. [(set TILE:$dst, (int_x86_tilezero_internal
  67. GR16:$src1, GR16:$src2))]>;
  68. let usesCustomInserter = 1 in {
  69. // Pseudo instructions, using immediates instead of tile registers.
  70. // To be translated to the actual instructions in X86ISelLowering.cpp
  71. let mayLoad = 1 in
  72. def PTILELOADD : PseudoI<(outs), (ins u8imm:$src1, sibmem:$src2), []>;
  73. let mayLoad = 1 in
  74. def PTILELOADDT1 : PseudoI<(outs), (ins u8imm:$src1,
  75. sibmem:$src2), []>;
  76. let mayStore = 1 in
  77. def PTILESTORED : PseudoI<(outs), (ins i8mem:$dst, u8imm:$src), []>;
  78. def PTILEZERO : PseudoI<(outs), (ins u8imm:$src),
  79. [(int_x86_tilezero timm:$src)]>;
  80. }
  81. } // SchedRW
  82. } // HasAMXTILE
  83. let Predicates = [HasAMXINT8, In64BitMode] in {
  84. let SchedRW = [WriteSystem] in {
  85. let Constraints = "$src1 = $dst" in {
  86. def TDPBSSD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst),
  87. (ins TILE:$src1, TILE:$src2, TILE:$src3),
  88. "tdpbssd\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>,
  89. VEX_4V, T8XD;
  90. def TDPBSUD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst),
  91. (ins TILE:$src1, TILE:$src2, TILE:$src3),
  92. "tdpbsud\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>,
  93. VEX_4V, T8XS;
  94. def TDPBUSD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst),
  95. (ins TILE:$src1, TILE:$src2, TILE:$src3),
  96. "tdpbusd\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>,
  97. VEX_4V, T8PD;
  98. def TDPBUUD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst),
  99. (ins TILE:$src1, TILE:$src2, TILE:$src3),
  100. "tdpbuud\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>,
  101. VEX_4V, T8PS;
  102. }
  103. // Pseduo instruction for RA.
  104. let isPseudo = true, Constraints = "$src4 = $dst" in {
  105. def PTDPBSSDV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
  106. GR16:$src2, GR16:$src3, TILE:$src4,
  107. TILE:$src5, TILE:$src6),
  108. [(set TILE: $dst,
  109. (int_x86_tdpbssd_internal GR16:$src1, GR16:$src2,
  110. GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
  111. def PTDPBSUDV : PseudoI<(outs TILE: $dst), (ins GR16:$src1,
  112. GR16:$src2, GR16:$src3, TILE:$src4,
  113. TILE:$src5, TILE:$src6),
  114. [(set TILE: $dst,
  115. (int_x86_tdpbsud_internal GR16:$src1, GR16:$src2,
  116. GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
  117. def PTDPBUSDV : PseudoI<(outs TILE: $dst), (ins GR16:$src1,
  118. GR16:$src2, GR16:$src3, TILE:$src4,
  119. TILE:$src5, TILE:$src6),
  120. [(set TILE: $dst,
  121. (int_x86_tdpbusd_internal GR16:$src1, GR16:$src2,
  122. GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
  123. def PTDPBUUDV : PseudoI<(outs TILE: $dst), (ins GR16:$src1,
  124. GR16:$src2, GR16:$src3, TILE:$src4,
  125. TILE:$src5, TILE:$src6),
  126. [(set TILE: $dst,
  127. (int_x86_tdpbuud_internal GR16:$src1, GR16:$src2,
  128. GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
  129. }
  130. let usesCustomInserter = 1 in {
  131. // Pseudo instructions, using immediates instead of tile registers.
  132. // To be translated to the actual instructions in X86ISelLowering.cpp
  133. def PTDPBSSD : PseudoI<(outs), (ins u8imm:$src1,
  134. u8imm:$src2, u8imm:$src3),
  135. [(int_x86_tdpbssd timm:$src1,
  136. timm:$src2, timm:$src3)]>;
  137. def PTDPBSUD : PseudoI<(outs), (ins u8imm:$src1,
  138. u8imm:$src2, u8imm:$src3),
  139. [(int_x86_tdpbsud timm:$src1,
  140. timm:$src2, timm:$src3)]>;
  141. def PTDPBUSD : PseudoI<(outs), (ins u8imm:$src1,
  142. u8imm:$src2, u8imm:$src3),
  143. [(int_x86_tdpbusd timm:$src1,
  144. timm:$src2, timm:$src3)]>;
  145. def PTDPBUUD : PseudoI<(outs), (ins u8imm:$src1,
  146. u8imm:$src2, u8imm:$src3),
  147. [(int_x86_tdpbuud timm:$src1,
  148. timm:$src2, timm:$src3)]>;
  149. }
  150. }
  151. } // HasAMXTILE
  152. let Predicates = [HasAMXBF16, In64BitMode] in {
  153. let SchedRW = [WriteSystem] in {
  154. let Constraints = "$src1 = $dst" in
  155. def TDPBF16PS : I<0x5c, MRMSrcReg4VOp3, (outs TILE:$dst),
  156. (ins TILE:$src1, TILE:$src2, TILE:$src3),
  157. "tdpbf16ps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
  158. []>, VEX_4V, T8XS;
  159. // Pseduo instruction for RA.
  160. let isPseudo = true, Constraints = "$src4 = $dst" in
  161. def PTDPBF16PSV : PseudoI<(outs TILE: $dst), (ins GR16:$src1,
  162. GR16:$src2, GR16:$src3, TILE:$src4,
  163. TILE:$src5, TILE:$src6),
  164. [(set TILE: $dst,
  165. (int_x86_tdpbf16ps_internal GR16:$src1,
  166. GR16:$src2, GR16:$src3, TILE:$src4,
  167. TILE:$src5, TILE:$src6))]>;
  168. let usesCustomInserter = 1 in {
  169. // Pseudo instructions, using immediates instead of tile registers.
  170. // To be translated to the actual instructions in X86ISelLowering.cpp
  171. def PTDPBF16PS : PseudoI<(outs), (ins u8imm:$src1,
  172. u8imm:$src2, u8imm:$src3),
  173. [(int_x86_tdpbf16ps timm:$src1,
  174. timm:$src2, timm:$src3)]>;
  175. }
  176. }
  177. } // HasAMXTILE, HasAMXBF16
  178. //AMX-FP16
  179. let Predicates = [HasAMXFP16, In64BitMode] in {
  180. let SchedRW = [WriteSystem] in {
  181. let Constraints = "$src1 = $dst" in {
  182. def TDPFP16PS : I<0x5c, MRMSrcReg4VOp3, (outs TILE:$dst),
  183. (ins TILE:$src1, TILE:$src2, TILE:$src3),
  184. "tdpfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}",
  185. []>, VEX_4V, T8XD;
  186. }
  187. // Pseduo instruction for RA.
  188. let isPseudo = true, Constraints = "$src4 = $dst" in {
  189. def PTDPFP16PSV : PseudoI<(outs TILE: $dst), (ins GR16:$src1,
  190. GR16:$src2, GR16:$src3, TILE:$src4,
  191. TILE:$src5, TILE:$src6),
  192. [(set TILE: $dst,
  193. (int_x86_tdpfp16ps_internal GR16:$src1,
  194. GR16:$src2, GR16:$src3, TILE:$src4,
  195. TILE:$src5, TILE:$src6))]>;
  196. }
  197. let usesCustomInserter = 1 in {
  198. def PTDPFP16PS : PseudoI<(outs), (ins u8imm:$src1,
  199. u8imm:$src2, u8imm:$src3),
  200. [(int_x86_tdpfp16ps timm:$src1,
  201. timm:$src2, timm:$src3)]>;
  202. }
  203. }
  204. } // HasAMXTILE, HasAMXFP16